输入data为矩阵,clusternumber为聚类的个数,该算法使用的是计量地理的思想。使用的测试数据如下:
0.0,0.375,0.483,1.749,1.516,0.375
0.0,0.0,0.776,1.596,1.336,1.743
0.0,0.0,0.0,1.926,1.662,2.154
0.0,0.0,0.0,0.0,0.501,0.693
0.0,0.0,0.0,0.0,0.0,0.589
0.0,0.0,0.0,0.0,0.0,0.0
#order1:获取的最小值行列号
#isexist:判断的聚类结果里面是否包含新的类,如果包括,将新类加入进去
#newdata:如果新类由两个元素,并且这两个元素属于两个类,吧这两个类里面的数据放到newdata里面,最后合并为一类
#Flag_DeleteRowAndCol:在最后删除已分类的行列用到
#Flag_DeleteName:在最后删除行列名
#Flag_DeleteNumber:一共删除多少行(列)
#counDIM:新加的行列名称
#calcdis:用于计算所有类与当前值的距离
#leaveMatrixDim:用于循环的数据矩阵是否需要删除最后一列,因为最后一列始终为所有类别与剩余元素的最小值,并且只有一列
cluster = function(data,clusterNumbers)
{
#data=as.data.frame(b)
#clusterNumbers=4
#browser()
name=rownames(data)
ori=data
counDIM=1+nrow(data)
#
# dist=data.frame(1,2:6,t(data[1,2:6]))
# colnames(dist)=c('rownumber','colnumber','value')
# for(i in 2:(nrow(data)-1))
# {
# j=i+1
# a=data.frame(i,j:6,t(data[i,j:6]))
# colnames(a)=c('rownumber','colnumber','value')
# dist=rbind(dist,a)
# print(i)
# }
# rownames(dist)=NULL
leaveMatrixDim=nrow(ori)
clusterresult=matrix(0,leaveMatrixDim,leaveMatrixDim)
rownames(clusterresult)=1:leaveMatrixDim
colnames(clusterresult)=1:leaveMatrixDim
newcluster=matrix(0,10*leaveMatrixDim,leaveMatrixDim)
rownames(newcluster)=1:(10*leaveMatrixDim)
colnames(newcluster)=1:(leaveMatrixDim)
finalDIMofData=nrow(data)+1
# clusterresult[1,1]=6
# clusterresult[2,1]=1
# clusterresult[2,2]=2
# clusterresult[2,3]=6
# calc_dist=dist[which(dist$value!=-1),]
m_iswhile=TRUE
while(m_iswhile)
{
minValue= min(data[data!=0])
getrowandcol=which(data==minValue, arr.ind = TRUE)
order1=c(getrowandcol)
Flag_DeleteRowAndCol=order1
Flag_DeleteName=as.numeric(rownames(data)[order1])
order1=unique(Flag_DeleteName)
Flag_DeleteNumber=length(order1)
# for(indx in 1:Flag_DeleteNumber)
# {
# if(order1[indx]<=(as.numeric(rownames(ori)[nrow(ori)])))
# leaveMatrixDim=leaveMatrixDim-1
# }
leaveMatrixDim=nrow(data)
#如果与已经分类的归为一类,需要找到那一类的所有行列
#if(any(order1>(nrow(data)-Flag_DeleteNumber))==TRUE)
if(any(order1>(as.numeric(rownames(ori)[nrow(ori)])))==TRUE)
{
#browser()
calcrc=which(newcluster == minValue, arr.ind = TRUE)
for(i in 1:nrow(calcrc))
{
calcrow=which(newcluster == minValue, arr.ind = TRUE)[i,1]
calccol=which(newcluster == minValue, arr.ind = TRUE)[i,2]
order1=cbind(order1, newcluster[c(calcrow+1,calcrow+2),calccol])
}
# calccol=which(newcluster == minValue, arr.ind = TRUE)
# for(i in 1:nrow(getrowandcol))
# {
# order1=cbind(order1, newcluster[c(calcrow+1,calcrow+2),calccol])
# }
order1=c(order1)
order1=order1[order1<=(as.numeric(rownames(ori)[nrow(ori)]))]
order1=unique(order1)
}
#查看当前的两条轨迹是否已经在结果类别中出现过
for(ex in 1:length(order1))
{
if(ex==1)
{
isexist=which(clusterresult==order1[ex], arr.ind = TRUE)
}else
{
isexist=rbind(isexist, which(clusterresult==order1[ex], arr.ind = TRUE))
}
}
#如果出现过,只留下行号,即类别号,后面要归为这一类里面
if(nrow(isexist)>0)
isexist=unique(isexist[,1])
#判断是否需要合并现有的两个类别
if(length(isexist)>1)
{
for(l in 1:length(isexist))
{
if(l==1)
{
newdata=c((clusterresult[isexist[1],]))
}else
{
newdata=rbind(newdata,c(clusterresult[isexist[l],]))
}
}
newdata=c(newdata)
newdata=as.data.frame(newdata)
for(k in 1:length(order1))
{
newdata=rbind(newdata,order1[k])
}
newdata=unique(newdata)
newdata=newdata[which(newdata>0),1]
clusterresult=clusterresult[-isexist,]
#把结果中的两类合并后,要把这两类清零,然后把结果都网上提,保证前面部分有数据,后面为0,(数据连续,0连续,不能中间有间隔)
for(i in 1:length(isexist))
{
clusterresult=rbind(clusterresult,rep(0,nrow(ori)))
}
if(length(which(clusterresult!=0, arr.ind = TRUE))>0)
{
irow=(max(which(clusterresult!=0, arr.ind = TRUE)[,1]))+1
}else
{
irow=1
}
icol=1
for(k in 1:length(newdata))
{
clusterresult[irow,icol]=(newdata[k])
icol=icol+1
}
rownames(clusterresult)[irow]=counDIM
}
if(length(isexist)==1)
{
irow=isexist[1]
icol=(max(which(clusterresult[irow,]!=0, arr.ind = TRUE)))+1
for(k in 1:length(order1))
{
clusterresult[irow,icol]=order1[k]
icol=icol+1
}
rownames(clusterresult)[irow]=counDIM
}#新填一类
if(length(isexist)<1)
{
#查找最后的类别号
if(length(which(clusterresult!=0, arr.ind = TRUE))>0)
{
irow=(max(which(clusterresult!=0, arr.ind = TRUE)[,1]))+1
}else
{
irow=1
}
index=1
for(k in 1:length(order1))
{
clusterresult[irow,index]=order1[k]
index=index+1
}
rownames(clusterresult)[irow]=counDIM
}
# newcluster= data.frame(rep(0,(nrow(data)-length(order1))),rep(0,(nrow(data)-length(order1))),rep(0,(nrow(data)-length(order1))))
#用于一开始查询已有类别的行号,也可以作为最后新增列使用
#newcluster=matrix(0,3,(nrow(data)-length(name)))
if(length(intersect(name,Flag_DeleteName))>0)
{
name=name[-which(name%in%intersect(name,as.character(Flag_DeleteName)))]
}
#把要删除的行列删除
data=data[-Flag_DeleteRowAndCol,-Flag_DeleteRowAndCol]
# if(nrow(data)>leaveMatrixDim)
# {
# data=data[1:leaveMatrixDim,1:leaveMatrixDim]
# }
#如果新添加的列被聚到类别中,那newcluster(记录每次新列的数据)就要把相应的清零
for(i in 1:length(Flag_DeleteName))
{
if(Flag_DeleteName[i]>nrow(ori))
{
calcrow=nrow(ori)+3*(Flag_DeleteName[i]-nrow(ori)-1)+1
# getculsterrow=which(rownames(newcluster)==calcrow,arr.ind = TRUE)[1]
newcluster[c(calcrow,calcrow+1,calcrow+2),]=0
}
}
#对聚类的结果进行去重
for(i in 1:(max(which(clusterresult!=0, arr.ind = TRUE)[,1])))
{
a=clusterresult[i,(clusterresult[i,]>0)]
a=unique(a)
clusterresult[i,]=0
for(j in 1:length(a))
{
clusterresult[i,j]=a[j]
}
}
#行列名称也要改,因为后面聚类的时候是通过行列名称判断的
if((max(which(clusterresult!=0, arr.ind = TRUE)[,1]))!=clusterNumbers || nrow(data)>(nrow(ori)/20))
{
#查找要归入类内的所有行列号
# for(i in 1:(max(which(clusterresult!=0, arr.ind = TRUE)[,1])))
# {
# if(i==1)
# {
# calcdis=clusterresult[i,which(clusterresult[i,]!=0, arr.ind = TRUE)]
# }else
# {
# calcdis=rbind(calcdis,clusterresult[i,which(clusterresult[i,]!=0, arr.ind = TRUE)])
# }
#
# }
# calcdis=unique(c(calcdis))
leavename=rownames(data)
for(i in 1:((length(leavename))))
{
#browser()
#if(leaveMatrixDim==4) browser()
#判断要重新计算距离的两类是否是集合,如果是,需要对应找到这个集合里面的所有元素进行比较
if(as.numeric(leavename[i])>nrow(ori))
{
getculsterrow=which(rownames(clusterresult)==as.numeric(leavename[i]),arr.ind = TRUE)[1]
disd=clusterresult[getculsterrow,which(clusterresult[getculsterrow,]!=0, arr.ind = TRUE)]#disd是已有的类别
getculsterrow=which(rownames(clusterresult)==counDIM,arr.ind = TRUE)[1]
calcdis=clusterresult[getculsterrow,which(clusterresult[getculsterrow,]!=0, arr.ind = TRUE)]
new1= ifelse(ori[calcdis[1],as.numeric(disd[1])]>ori[as.numeric(disd[1]),calcdis[1]],ori[calcdis[1],as.numeric(disd[1])],ori[as.numeric(disd[1]),calcdis[1]])
newcluster[finalDIMofData,i]=new1
newcluster[finalDIMofData+1,i]=as.numeric(disd[1])
newcluster[finalDIMofData+2,i]=calcdis[1]
for(j in 1:length(calcdis))
{
for(k in 1:length(disd))
{
new2= ifelse(ori[calcdis[j],as.numeric(disd[k])]>ori[as.numeric(disd[k]),calcdis[j]],ori[calcdis[j],as.numeric(disd[k])],ori[as.numeric(disd[k]),calcdis[j]])
if(new1==0)
{
new1= new2
newcluster[finalDIMofData,i]=new1
newcluster[finalDIMofData+1,i]=as.numeric(disd[k])
newcluster[finalDIMofData+2,i]=calcdis[j]
}
if(new1>new2 && new2!=0)
{
new1= new2
newcluster[finalDIMofData,i]=new1
newcluster[finalDIMofData+1,i]=as.numeric(disd[k])
newcluster[finalDIMofData+2,i]=calcdis[j]
}
}
}
}else
{
getculsterrow=which(rownames(clusterresult)==counDIM,arr.ind = TRUE)[1]
calcdis=clusterresult[getculsterrow,which(clusterresult[getculsterrow,]!=0, arr.ind = TRUE)]
new1= ifelse(ori[calcdis[1],as.numeric(leavename[i])]>ori[as.numeric(leavename[i]),calcdis[1]],ori[calcdis[1],as.numeric(leavename[i])],ori[as.numeric(leavename[i]),calcdis[1]])
newcluster[finalDIMofData,i]=new1
newcluster[finalDIMofData+1,i]=as.numeric(leavename[i])
newcluster[finalDIMofData+2,i]=calcdis[1]
for(j in 2:length(calcdis))
{
new2= ifelse(ori[calcdis[j],as.numeric(leavename[i])]>ori[as.numeric(leavename[i]),calcdis[j]],ori[calcdis[j],as.numeric(leavename[i])],ori[as.numeric(leavename[i]),calcdis[j]])
if(new1==0)
{
new1= new2
newcluster[finalDIMofData,i]=new1
newcluster[finalDIMofData+1,i]=as.numeric(leavename[i])
newcluster[finalDIMofData+2,i]=calcdis[j]
}
if(new1>new2 && new2!=0)
{
new1= new2
newcluster[finalDIMofData,i]=new1
newcluster[finalDIMofData+1,i]=as.numeric(leavename[i])
newcluster[finalDIMofData+2,i]=calcdis[j]
}
}
}
}
add=newcluster[finalDIMofData,]
add=add[add>0]
data=cbind(data, t(t(add)))#加一列,是计算的新距离
data=rbind(data,c(rep(0,(nrow(data)+1))))#加一行,全为0
rownames(data)[nrow(data)]=counDIM
colnames(data)[nrow(data)]=counDIM
counDIM=counDIM+1
rownames(data)[1:length(name)]=name
colnames(data)[1:length(name)]=name
finalDIMofData=finalDIMofData+3
}else
{
m_iswhile=FALSE
}
cat('待聚类元素个数:',leaveMatrixDim,'\n')
cat('newcluster中共有数据行:',(max(which(newcluster!=0, arr.ind = TRUE)[,1])),'\n')
cat('已完成聚类个数:',(max(which(clusterresult!=0, arr.ind = TRUE)[,1])),'\n')
#print((leaveMatrixDim))
#print((leaveMatrixDim))
}
browser()
return(clusterresult)
}
cluster(C,1)
b=AREA_COST_Order
for(i in 1:nrow(AREA_COST_Order))
{
for(j in i:0)
{
b[i,j]=0
}
}
b[b==1]=0
rownames(b)=1:nrow(b)
colnames(b)=1:nrow(b)
diag(b)=0
result_cluster=cluster(b,4)
data=as.data.frame(b)
clusterNumbers=4
b=read.csv("K:\\本科\\学习\\C++\\程序\\聚类分析\\ClusterAnalysis\\Table 7-5.txt", sep = ",",header=F)
cluster(b,1)
(max(which(result_cluster!=0, arr.ind = TRUE)[,1]))