python学习笔记 python实现k-means聚类

时间:2023-03-09 06:43:47
python学习笔记 python实现k-means聚类
 # -*- coding: utf-8 -*-
"""
Created on Thu Mar 16 14:52:58 2017 @author: Jarvis
"""
import tensorflow as tf
import numpy as np
import pandas as pd
import math
import random
from pandas import Series,DataFrame
def cal_dis(a,b):
sum = 0
for x,y in zip(a,b):
sum = sum+(x-y)*(x-y)
return math.sqrt(sum) def is_same_series(a,b): for x,y in zip(a,b):
if x != y:
return False
return True
def is_constant_vec(a,b):
if len(a) == 0 or len(b) == 0:
return False for x,y in zip(a,b):
if not is_same_series(x,y):
return False return True def init_typeSet(init_set,type_num):
for i in range(type_num):
init_set.append([])
return init_set def Mindis_type_no(x,vecs,typ_num = 4):
mindis = cal_dis(x,vecs[0])
ans = 0
for i in range(1,type_num):
tmp = cal_dis(x,vecs[i])
if mindis > tmp:
ans= i
mindis = cal_dis(x,vecs[i])
return ans def reduce_mean(vecs): reduce_vec = vecs[0].copy()
print(reduce_vec)
for i in range(1,len(vecs)):
print (vecs[i][5])
print (reduce_vec[5])
reduce_vec = reduce_vec+vecs[i] print (reduce_vec[5]) reduce_vec = reduce_vec/len(vecs)
# print(reduce_vec)
return reduce_vec
def get_vecs(sets):
vecs = []
for i in sets:
vecs.append(data.ix[i])
return vecs raw_data_file = pd.read_csv('NDVI_NDWI_all.csv',header = None,encoding = 'gbk')
data = (raw_data_file)
del data[0]
del data[1]
del data[2] type_num = 4
init_type_vec = []#类质心
tmp_set = set([])
data_size = len(data) while (len(tmp_set) < type_num):
tmp_set.add(random.choice(range(data_size))) for i in tmp_set:
tmp = data.ix[i]
init_type_vec.append(tmp) pre_vec = []
#print (is_constant_vec(pre_vec,init_type_vec)) while( not is_constant_vec(pre_vec,init_type_vec)):
type_set = []
type_set = init_typeSet(type_set,type_num)
for j in range(len(data)):
tmp_type = Mindis_type_no(data.ix[j],init_type_vec)
type_set[tmp_type].append(j)
#type_set[tmp_type].append(data.ix[j])
if(len(pre_vec) == 0):
pre_vec = init_type_vec.copy()
else:
for i in range(type_num):
pre_vec[i] = init_type_vec[i]
need_cal_vecs = get_ves(type_set[i])
init_type_vec[i] = reduce_mean(need_cal_vecs).copy()
for i in range(type_num):
print('--------------------------------')
print(type_set[i]) with open('output.txt',"w"):
for i in range(type_num):
print ("type %d"%i)
for j in type_set[i]:
print(j) #print(reduce_mean(type_set))
#for i in range(type_num):
# pre_vec[i] = tf.reduce_mean()