k-均值聚类算法二维实例,不多解释,解释就是掩饰,复制粘贴即可运行。
import time
import numpy as np
import random
import matplotlib.pyplot as plt
import operator
def func01(): #生成二维随机点
#random.seed(1)
kjz1=[[random.randint(,),random.randint(,)] for j in range(,)]
kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
return kjz1
def func02(kjz2w): #绘图
if kjz2w!=[]:
colors=['b','g','r','c','m','y','k'];s=;
for j in kjz2w:
for i in j:
plt.plot(i[], i[], color=colors[s%len(colors)], marker='.')
s=s+;
plt.ion();plt.show();plt.pause();plt.close();
def func03(kjz1,k): #计算初始均值,并返回初始分组
minxy=np.min(kjz1,axis=).tolist();maxxy=np.max(kjz1,axis=).tolist();
xjg=(maxxy[]-minxy[])/k;yjg=(maxxy[]-minxy[])/k;
meanxy=[];meanxy.append([minxy[],minxy[]]);meanxy.append([maxxy[],maxxy[]]);
for j in range(,k-):
meanxy.append([minxy[]+xjg*j,minxy[]+yjg*j])
kjz2wxy2=[[] for j in range(,len(meanxy))];
for j in kjz1:
s=;lslb=[];
for k in meanxy:
lslb.append([s,(j[]-k[])**+(j[]-k[])**]);s=s+; #一个坐标一组
lslb.sort(reverse=False,key=operator.itemgetter()) #正序
kjz2wxy2[lslb[][]].append(j)
return kjz2wxy2
def func05(lb2): #剔除空列表
j=;
while(True):
if len(lb2[j])<=:
lb2.pop(j)
else:
j=j+;
if j>=len(lb2):
break
return lb2
def func06(kjz2wxy): #求组合中心(均值)
meanxy=[];
for j in kjz2wxy:
meanxy.append(np.mean(j,axis=).tolist())
kjz2wxy2=[[] for j in range(,len(meanxy))];
for j in kjz2wxy:
for i in j: #点
s=;lslb=[];
for k in meanxy:
lslb.append([s,(i[]-k[])**+(i[]-k[])**]);s=s+;
lslb.sort(reverse=False,key=operator.itemgetter()) #正序
kjz2wxy2[lslb[][]].append(i)
kjz2wxy2=func05(kjz2wxy2)
return kjz2wxy2,meanxy
def func07(kjz2w,fz):
kjz2wxy=func03(kjz2w,fz) #坐标列表,分组,0-按照x轴均分
j=;
while(True):
kjz2wxy,meanxy=func06(kjz2wxy)
if j> and meanxy==meanxy2:
break
meanxy2=meanxy.copy();
j=j+;
print('迭代%d次' % (j))
func02(kjz2wxy) #绘图
if __name__=='__main__':
start=time.time();
for j in range(,):
kjz2w=func01()
func07(kjz2w,) #分6组
print('Time used:',int((time.time()-start)/*)/,'分钟')
上图是初始均值选取比较好的时候的情况,还有些情况是下面这样的。
今天还写了一种画蛇添足的算法,从一维算法过度来的,就像下面这样的。
import time
import numpy as np
import random
import matplotlib.pyplot as plt
import operator
def func01(): #生成二维随机点
random.seed()
kjz1=[[random.randint(,),random.randint(,)] for j in range(,)]
kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
#绘图
plt.xlabel('x-axis')
plt.ylabel('y-axis')
for j in kjz1:
plt.plot(j[], j[], color='b', marker='.', label='y1 data')
plt.ion();plt.show();plt.pause();plt.close();
return kjz1
def func02(kjz1,k,axis): #k个均值分k份
kjz1.sort(reverse=False,key=operator.itemgetter(-axis)) #正序
kjz1.sort(reverse=False,key=operator.itemgetter(axis)) #正序
wb2=kjz1.copy();
#初始均匀分组
xlb=[];a=round(len(wb2)/k);b=len(wb2)%k;
for j in range(,k+):
xlb.append(j*a)
if j==k:
xlb[j-]=xlb[j-]+b;
j=;wb1=[];
for j in range(,k):
wb1.append([])
i=;j=;
while(i<=len(wb2)-):
wb1[j].append(wb2[i]);
if i>=xlb[j]-:
j=j+;
i=i+;
kj1=means(wb1,axis);#初始分组均值
bj=;
while(True):
wb2=kjz1.copy();
if bj!=:
kj1=kj2.copy();
wb3=[];
for j in range(,k-):
wb3.append([])
for j in range(,k-):
i=;
while(True):
if wb2[i][axis]<=kj1[j]:
wb3[j].append(wb2.pop(i));
else:
i=i+;
if i>=len(wb2):
break
wb3.append(wb2)
for j in wb3:
if len(j)<=:
print('分组出现空组,返回[]');return []
kj2=means(wb3,axis);#过程均值
if bj==:
if kj1==kj2:
break
bj=;
return wb3
def means(lb1,axis): #计算均值
mean1=[];mean2=[];std1=[];
for j in lb1:
mean1.append(np.mean(j,axis=).tolist())
for j in range(,len(mean1)):
mean2.append(np.mean([mean1[j-][axis],mean1[j][axis]])) #分组均值使用各组的均值
return mean2
def func03(kjz2w): #绘图
if kjz2w!=[]:
colors=['b','g','r','c','m','y','k'];s=;
for j in kjz2w:
for i in j:
plt.plot(i[], i[], color=colors[s%len(colors)], marker='.', label='y1 data')
s=s+;
plt.ion();plt.show();plt.pause();plt.close();
def func04(kjz2wx,kjz2wy): #x,y 组整合
kjz2wxy=[[] for j in range(,len(kjz2wx)*len(kjz2wy))];k=;
for j in kjz2wx:
for i in kjz2wy:
for a in j:
if a in i:
kjz2wxy[k].append(a);
k=k+;
kjz2wxy=func05(kjz2wxy)
return kjz2wxy
def func05(lb2): #组合迭代
j=;
while(True):
if len(lb2[j])<=:
lb2.pop(j)
else:
j=j+;
if j>=len(lb2):
break
return lb2
def func06(kjz2wxy): #组合迭代
#求组合中心(均值)
meanxy=[];
for j in kjz2wxy:
meanxy.append(np.mean(j,axis=).tolist())
kjz2wxy2=[[] for j in range(,len(meanxy))];
for j in kjz2wxy:
for i in j: #点
s=;lslb=[];
for k in meanxy:
lslb.append([s,(i[]-k[])**+(i[]-k[])**]);s=s+;
lslb.sort(reverse=False,key=operator.itemgetter()) #正序
kjz2wxy2[lslb[][]].append(i)
kjz2wxy2=func05(kjz2wxy2)
return kjz2wxy2,meanxy
def func07(kjz2w):
kjz2wx=func02(kjz2w,,) #列表,分组数,x轴0,y轴1
func03(kjz2wx) #绘图
kjz2wy=func02(kjz2w,,)
func03(kjz2wy) #绘图
kjz2wxy=func04(kjz2wx,kjz2wy)
func03(kjz2wxy) #绘图
#kjz2wxy=func06(kjz2wxy)
j=;
while(True):
kjz2wxy,meanxy=func06(kjz2wxy)
if j> and meanxy==meanxy2:
break
meanxy2=meanxy.copy();
j=j+;
print('迭代%d次' % (j))
func03(kjz2wxy) #绘图
if __name__=='__main__':
start=time.time();
kjz2w=func01()
func07(kjz2w)
print('Time used:',int((time.time()-start)/*)/,'分钟')