大家好,欢迎来到IT知识分享网。
问题
- 根据下面表格中的数据,用Matlab(或Python)编程进行数据标准化处理;
- 根据标准化处理后的数据,用Matlab(或Python)编程,建立模糊相似矩阵,并编程求出其传递闭包矩阵;
- 根据模糊等价矩阵,编程绘制动态聚类图;
- 根据原始数据,编程确定最佳分类结果。
原始数据如下:
no Y1 Y2 Y3 Y4 Y5 Y6
x1 21 63 19 40 1.567 106
x2 23 74 30 75 2.693 54
x3 119 179 86 118 6.897 9
x4 115 168 49 89 2.637 29
x5 79 146 46 92 2.356 24
x6 79 158 48 103 2.142 7
x7 65 114 58 99 2.679 7
x8 68 119 58 96 3.099 6
x9 109 166 59 95 2.868 6
x10 118 177 56 89 2.64 7
代码如下
# -*- coding: utf-8 -*-
import numpy as np
import pprint
import xlrd
from copy import deepcopy
import math
from scipy.cluster import hierarchy #用于进行层次聚类,话层次聚类图的工具包
from scipy import cluster
import matplotlib.pyplot as plt
import pandas as pd
#矩阵不用科学计数法的显示方式
np.set_printoptions(suppress=True)
# 1.数据规范化:采用最大值规格法
def process_matrix(ori_matrix):
x = deepcopy(ori_matrix)
#获得特征指标矩阵行列数
x_rows = x.shape[0]#10,行数
x_cols = x.shape[1]#6,列数
#参数0代表对每一列求均值,参数1代表对每一行求均值,无参数则求所有元素的均值
max_matrix = np.max(x,axis = 0)
for i in range(x_rows):
for j in range(x_cols):
x[i][j] = (x[i][j])/max_matrix[j]
return x
# 2.构造模糊相似矩阵:采用最大最小法构造模糊相似矩阵
def get_rmatrix(Ori_matrix):
# Fuzzy_Similarity_Matrix : 模糊相似矩阵
Fuzzy_Similarity_Matrix = np.zeros((Ori_matrix.shape[0],Ori_matrix.shape[0]),dtype = 'float')
for i in range(Ori_matrix.shape[0]):
for j in range(Ori_matrix.shape[0]):
max_sum = 0
min_sum = 0
for k in range(Ori_matrix.shape[1]):
max_sum += max(Ori_matrix[i][k], Ori_matrix[j][k])
min_sum += min(Ori_matrix[i][k], Ori_matrix[j][k])
Fuzzy_Similarity_Matrix[i][j] = min_sum/max_sum
return Fuzzy_Similarity_Matrix
# 3.求传递闭包
def get_tR(r_matrix):
def t_r(mat):#传递闭包运算
rows = mat.shape[0]
cols = mat.shape[1]
min_list = []
new_mat = np.zeros((rows,cols),dtype = 'float')
for m in range(rows):
for n in range(cols):
min_list = []
now_row = mat[m]
now_col = mat[:,n]
for k in range(len(now_row)):
min_cell = min(mat[m][k],mat[:,n][k])
min_list.append(min_cell)
new_mat[m][n] = max(min_list)
return new_mat
#传递闭包判断是否停止运算
i = 0
while True:
t_r_matrix = t_r(r_matrix)
print("==================模糊相似矩阵R^%d================"%(2**i))
print(t_r_matrix)
#若get_tR(r_matrix) == t_r_matrix ,退出循环
if (t_r_matrix == r_matrix).all():
print("******** R^%d = R^%d ,传递闭包过程结束。******** "%(2**(i-1),2**i))
break
#否则,将t_r_matrix作为待求闭包矩阵
else:
r_matrix = t_r_matrix
i += 1
return t_r_matrix
# 4.按lambda截集进行动态聚类
def lambda_clustering(final_matrix):
rows = final_matrix.shape[0]
cols = final_matrix.shape[1]
lambda_list = list(set([i for j in final_matrix.tolist() for i in j]))#将numpy数组转化为一维列表并去重得到截集
lambda_list.sort(reverse=True)
result = [] #返回的结果
for i in range(len(lambda_list)):
class_list = [] #分类情况
temp_infor = {
'matrix':np.zeros((rows,cols),dtype = 'float'),'lambda':lambda_list[i],'class':[]} #每个lambda值的分类情况
for m in range(rows):#计算大于等于截集值的置1,小于截集值置0
for n in range(cols):
if final_matrix[m][n] >= lambda_list[i]:
temp_infor['matrix'][m][n] = 1
else:
temp_infor['matrix'][m][n] = 0
for m in range(rows):#首先遍历每一行,设这一行为m,与下一行即下一个实例比较
if (m+1) in [i for item in class_list for i in item]:#如果 m+1 行的索引号在 已经分好类的所有实例(class_list) 中就跳出循环,即 m+1 行的那个实例已经被分类完了
continue
now_class = []#如果没跳出循环说明这个m+1这个实例还没被分类,now_class用来存储 第m+1 个实例和其后面相同类别的实例
now_class.append(m+1)
for n in range(m+1,rows):#对于m+1及其往后的实例,若有相等归为一类向now_class添加元素
if (temp_infor['matrix'][m] == temp_infor['matrix'][n]).all() :
now_class.append(n+1)
class_list.append(now_class)
temp_infor['class'] = class_list
result.append(temp_infor)
return result
# 5.利用 F - 统计量寻找最优分类
#传入参数为result动态聚类结果字段有class(分类情况),lamdba(截集取值),matrix(0-1矩阵)standard_matrix标准化后的矩阵
def F_Statistics(results, standard_matrix):
#定义变量F 为F统计量数值 #定义class_class变量为类与类之间的距离 F的分子# 定义class_sample变量为类内样品间的距离 F的分母
for result in results:
mean_all = np.mean(standard_matrix, axis=0)#计算矩阵所有样品属性的平均值
sorts = result.get('class')
class_class = 0
class_sample = 0
for sort in sorts:#对于每个分类
sort = [i-1 for i in sort]#将分类转化 [4, 9, 10]-->[3, 8, 9]因为矩阵下标从零开始
# print(standard_matrix[sort])
mean_j = np.mean(standard_matrix[sort], axis=0)#计算矩阵第j类样品的平均值
xj_av_reduce_xall_av = (np.sum((np.square(mean_j - mean_all)), axis=0))**(1/2)#计算xj平均值减去x的平均值,对所有属性
class_class += (len(sort) * xj_av_reduce_xall_av**2)
#[nan, 138.72123087068982, 91.13931639141123, 33.60939825599838, 37.53752831809482, 18.76906273985417, 12.97829257460103, 7.850374240762219, 13.852860873165161, inf]
#计算类class_sample 为类内样品间的距离
sum_xij_reduce_xj_av = 0#用来储存这个类中所有样品的xij_reduce_xj_av值
for s in sort:#求每一个样品的xij_reduce_xj_av,对所有属性
xij_reduce_xj_av = (np.sum(np.square((standard_matrix[s] - mean_j)), axis=0))**(1/2)#计算x(i,j)减去xj的平均值,对所有属性
sum_xij_reduce_xj_av += (xij_reduce_xj_av**2)
class_sample += sum_xij_reduce_xj_av
F = (class_class / (len(sorts) - 1)) / (class_sample / (standard_matrix.shape[0] - len(sorts)))
result['F'] = F
print("******************** 对于上面抛出警告 RuntimeWarning: divide by zero encountered in double_scalars 忽略 ************************")
F_np = np.array([result.get('F') for result in results])
where_are_inf = np.isinf(F_np)#无穷大的值组成的布尔列表
where_not_inf = (1-where_are_inf).astype(np.bool)#对上面取反,得到不是无穷大的正常值
F_np[where_are_inf] = np.mean(F_np[where_not_inf])#将无穷大值的地方置为平均值
where_are_nan = np.isnan(F_np)#nan的值组成的布尔列表
where_not_nan = (1-where_are_nan).astype(np.bool)#对上面取反,得到不是nan的正常值
F_np[where_are_nan] = np.mean(F_np[where_not_nan])#将nan值的地方置为平均值,原谅我不会布尔矩阵取或运算
print("***********最优分类结果如下***************")
return np.argmax(F_np)#返回F取最大时候的下标
def main():
# 0.读取数据
path = 'data.xlsx'
df = pd.read_excel(path, index_col=0)
ori_matrix = df.values
# 1.数据规范化:采用最大值规格法规范化原矩阵
standard_matrix = process_matrix(ori_matrix)
# 2.构造模糊相似矩阵:采用最大最小法构造模糊相似矩阵
r_matrix = get_rmatrix(standard_matrix)
# 将模糊相似矩阵取两位小数,否则后面的与lambda值判断大小时会出错
for i in range(r_matrix.shape[0]) :
for j in range(r_matrix.shape[1]) :
r_matrix[i][j] = round(r_matrix[i][j],2)
print('==================特征指标矩阵================')
print(ori_matrix)
print('===============标准化特征指标矩阵=============')
print(standard_matrix)
print('==================模糊相似矩阵R================')
print(r_matrix)
# 3.求传递闭包t(R)
t_r_matrix = get_tR(r_matrix)
Z = hierarchy.linkage(t_r_matrix, method ='ward', metric='euclidean')
hierarchy.dendrogram(Z,labels = df.index)
label = cluster.hierarchy.cut_tree(Z,height=1)
label = label.reshape(label.size,)
plt.show()
# 4.按lambda截集进行动态聚类
result = lambda_clustering(t_r_matrix)
print("************动态聚类结果为****************")
for x in result:
pprint.pprint(x)
# 5.利用 F - 统计量寻找最优分类
pprint.pprint(result[F_Statistics(result,standard_matrix)])
if __name__ == '__main__':
main()
参考
https://blog.csdn.net/CodertypeA/article/details/86483175
https://blog.csdn.net/m0_37804518/article/details/78806138
———————————————————–matlab———————————————————————-
matlab版模糊聚类分析
Main.m
x=[37 38 12 16 13 12;
69 73 74 22 64 17;
73 86 49 27 68 39;
57 58 64 84 63 28;
38 56 65 85 62 27;
65 55 64 15 26 48;
65 56 15 42 65 35 ;
66 45 65 55 34 32];
x_zuida = [];
x_pingyi = [];
R_zuidazuixiao = [];
R_suanshu = [];
[x_zuida,x2] = bzh(x);
[R_zuidazuixiao,R_suanshu] = bd(x_zuida);
tR_zuidazuixiao = chuandi(R_zuidazuixiao)
tR_suanshu = chuandi(R_suanshu)
juleitu(tR_zuidazuixiao)
julei(tR_zuidazuixiao)
bd.m
function [R1,R2] = bd(x)
%函数功能:标定
[m,n] = size(x);
for i = 1:m
for j = 1:m
for k = 1:n
qx(k) = min(x(i,k),x(j,k)); %取小
qd(k) = max(x(i,k),x(j,k)); %取大
end
R1(i,j) = sum(qx)/sum(qd); %最大最小法
R2(i,j) = 2*sum(qx)/(sum(x(i,:))+sum(x(j,:))); %算术平均最小法
if i == j
R1(i,j) = 1;
R2(i,j) = 1;
end
end
end
R_zuidazuixiao = R1
R_suanshu = R2
bzh.m
function [x_zuida, x_pingyi] = bzh(x)
%函数功能:标准化矩阵
[m,n] = size(x);
B = max(x);
B1 = max(x) - min(x);
Bm = min(x);
for i = 1:n
x1(:,i) = x(:,i)/B(i); %最大值规格化
x2(:,i) = (x(:,i) - Bm(i))/B1(i); %平移极差标准化
end
x_zuida = x1
x_pingyi = x2
chuandi.m
function [tr] = chuandi(x)
%函数功能:求传递闭包
R = x;
a=size(R);
B=zeros(a);
flag=0;
while flag==0
for i= 1: a
for j= 1: a
for k=1:a
B( i , j ) = max(min( R( i , k) , R( k, j) ) , B( i , j ) ) ;%R与R内积,先取小再取大
end
end
end
if B==R
flag=1;
else
R=B;%循环计算R传递闭包
end
end
tr = B;
julei.m
function [M,N]=julei(tR1)
%函数功能:求出lamda截矩阵
tR = tR1;
lamda=unique(tR); %取A矩阵不同元素构成的向量,来确定阈值
L=length(lamda);
lamda = sort(lamda,'descend');
for i = 1:L
tR = tR1;
lamda(i)
tR(find(tR>=lamda(i))) = 1; %令大于lamda的为1
tR(find(tR<lamda(i))) = 0; %令小于lamda的为0
tR
end
juleitu.m
function [M,N]=juleitu(tR)
%函数功能:画动态聚类图
lamda=unique(tR);%取A矩阵不同元素构成的向量,来确定阈值
L=length(lamda);
M=1:L;
for i=L-1:-1:1 %获得分类情况:对元素分类进行排序
[m,n]=find(tR==lamda(i));
N{
i,1}=n;
N{
i,2}=m;
tR(m(1),:)=0;
mm=unique(m);
N{
i,3}=mm;
len=length(find(m==mm(1)));
depth=length(find(m==mm(2)));
index1=find(M==mm(1));
MM=[M(1:index1-1),M(index1+depth:L)];
index2=find(MM==mm(2));
M=M(index1:index1+depth-1);
M=[MM(1:index2-1),M,MM(index2:end)];
end
M=[1:L;M;ones(1,L)];
h=(max(lamda)-min(lamda))/L;
figure
text(L,1,sprintf('x%d',M(2,L)));
text(0,1,sprintf('%3.4f',1));
text(0,(1+min(lamda))/2,sprintf('%3.4f',(1+min(lamda))/2));
text(0,min(lamda),sprintf('%3.4f',min(lamda)));
hold on
for i=L-1:-1:1 %获得分类情况:每一个子类的元素
m=N{
i,2};
n=N{
i,1};
mm=N{
i,3};
k=find(M(2,:)==mm(1));
l=find(M(2,:)==mm(2));
x1=M(1,k);
y1=M(3,k);
x2=M(1,l);
y2=M(3,l);
x=[x1,x1,x2,x2];
M(3,[k,l])=lamda(i);
M(1,[k,l])=sum(M(1,[k,l]))/length(M(1,[k,l]));
y=[y1,lamda(i),lamda(i),y2];
plot(x,y);
text(i,1,sprintf('x%d',M(2,i)));
text(M(1,k(1)),lamda(i)+h*0.1,sprintf('%3.4f',lamda(i)));
end
axis([0 L+1 min(lamda) max(lamda)])
axis off
hold off
end
将main函数里的矩阵改一下就能用
免责声明:本站所有文章内容,图片,视频等均是来源于用户投稿和互联网及文摘转载整编而成,不代表本站观点,不承担相关法律责任。其著作权各归其原作者或其出版社所有。如发现本站有涉嫌抄袭侵权/违法违规的内容,侵犯到您的权益,请在线联系站长,一经查实,本站将立刻删除。 本文来自网络,若有侵权,请联系删除,如若转载,请注明出处:https://yundeesoft.com/21661.html