[0b32b6]: / python-scripts / runSimulationsCluster.py

Download this file

80 lines (73 with data), 4.2 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import numpy as np
from sklearn.cluster import KMeans
import numpy as np
from sklearn.metrics import silhouette_score, davies_bouldin_score
datatypes=["equal","heterogeneous"]
typenums=[5,10,15]
data_names = ['VAE_FCTAE_EM','AE_FAETC_EM', 'AE_FCTAE_EM', 'DAE_FAETC_EM', 'DAE_FCTAE_EM','SVAE_FCTAE_EM','MMDVAE_EM']
for datatype in datatypes:
for typenum in typenums:
for data_name in data_names:
encoded_factors=np.loadtxt('./result/simulations/{datatype}/{typenum}/{d}_{typenum}.txt'.format(datatype=datatype,typenum=typenum,d=data_name))
savepath='./result/simulations/{datatype}/{typenum}/{d}_cluster_result.txt'.format(datatype=datatype,typenum=typenum,d=data_name)
with open(savepath, 'w') as f2:
print('method:{d}\n'.format(d=data_name))
f2.write('method:{d}\n'.format(d=data_name))
for cluster_num in range(2,16,1):
all_silhouette=[]
all_DBI=[]
for i in range(100):
clf = KMeans(n_clusters=cluster_num)
clf.fit(encoded_factors) # 模型训练
labels = clf.labels_
silhouetteScore = silhouette_score(encoded_factors, labels, metric='euclidean')
all_silhouette.append(silhouetteScore)
davies_bouldinScore = davies_bouldin_score(encoded_factors, labels)
all_DBI.append(davies_bouldinScore)
avg_silhouette=np.mean(all_silhouette)
avg_DBI=np.mean(all_DBI)
# print("silhouetteScore:", avg_silhouette)
# print("davies_bouldinScore:", avg_DBI)
print('k:{k}\nsilhouetteScore:{s}\ndavies_bouldinScore:{d}\n'.format(k=cluster_num, s=avg_silhouette,d=avg_DBI))
f2.write('*'*20+'\n')
f2.write('k:{k}\nsilhouetteScore:{s}\ndavies_bouldinScore:{d}\n'.format(k=cluster_num, s=avg_silhouette,d=avg_DBI))
#直接拼接
# files = ['aml', 'breast', 'colon', 'kidney', 'liver', 'lung', 'melanoma', 'ovarian', 'sarcoma','gbm']
# for f in files:
# datapath='./data/cancer_do_cluster/{f}'.format(f=f)
# omics1 = np.loadtxt('{}/log_exp_omics.txt'.format(datapath))
# omics1 = np.transpose(omics1)
# omics1 = normalize(omics1, axis=0, norm='max')
# print(omics1.shape)
# omics2 = np.loadtxt('{}/log_mirna_omics.txt'.format(datapath))
# omics2 = np.transpose(omics2)
# omics2 = normalize(omics2, axis=0, norm='max')
# print(omics2.shape)
# omics3 = np.loadtxt('{}/methy_omics.txt'.format(datapath))
# omics3 = np.transpose(omics3)
# omics3 = normalize(omics3, axis=0, norm='max')
# print(omics3.shape)
# omics = np.concatenate((omics1, omics2, omics3), axis=1)
# encoded_factors=omics
# savepath='./result/cancer_do_cluster/{f}/Contact_cluster_result.txt'.format(f=f)
# with open(savepath, 'w') as f2:
# print('cancer:{f}\nmethod:直接拼接'.format(f=f))
# f2.write('cancer:{f}\nmethod:直接拼接\n'.format(f=f))
# for typenum in range(2,7,1):
# all_silhouette=[]
# all_DBI=[]
# for i in range(100):
# clf = KMeans(n_clusters=typenum)
# clf.fit(encoded_factors) # 模型训练
# labels = clf.labels_
# silhouetteScore = silhouette_score(encoded_factors, labels, metric='euclidean')
# all_silhouette.append(silhouetteScore)
# davies_bouldinScore = davies_bouldin_score(encoded_factors, labels)
# all_DBI.append(davies_bouldinScore)
# avg_silhouette=np.mean(all_silhouette)
# avg_DBI=np.mean(all_DBI)
# # print("silhouetteScore:", avg_silhouette)
# # print("davies_bouldinScore:", avg_DBI)
# print('k:{k}\nsilhouetteScore:{s}\ndavies_bouldinScore:{d}\n'.format(k=typenum, s=avg_silhouette,d=avg_DBI))
# f2.write('zly'*20+'\n')
# f2.write('k:{k}\nsilhouetteScore:{s}\ndavies_bouldinScore:{d}\n'.format(k=typenum, s=avg_silhouette,d=avg_DBI))