Switch to unified view

a b/python-scripts/runCancerCluster.py
1
import numpy as np
2
from sklearn.cluster import KMeans
3
import numpy as np
4
from sklearn.metrics import silhouette_score, davies_bouldin_score
5
from sklearn.preprocessing import normalize
6
7
8
files = ['aml', 'breast', 'colon', 'kidney', 'liver', 'lung', 'melanoma', 'ovarian', 'sarcoma','gbm']
9
#files = ['liver', 'lung', 'melanoma', 'ovarian', 'sarcoma','gbm']
10
data_names = ['SVAE_FCTAE_EM','MMDVAE_EM']
11
#data_names = ['VAE_FCTAE_EM','AE_FAETC_EM', 'AE_FCTAE_EM', 'DAE_FAETC_EM', 'DAE_FCTAE_EM', 'LSTMVAE_FCTAE_EM']
12
for f in files:
13
    for data_name in data_names:
14
        # encoded_factors=np.loadtxt('./result/cancer_do_cluster/{f}/{d}.txt'.format(f=f, d=data_name))
15
        encoded_factors=np.loadtxt('./result/cancer/{f}/{d}.txt'.format(f=f, d=data_name))
16
        savepath='./result/cancer_do_cluster/{f}/{d}_cluster_result.txt'.format(f=f, d=data_name)
17
        with open(savepath, 'w') as f2:
18
            print('cancer:{f}\nmethod:{d}'.format(f=f, d=data_name))
19
            f2.write('cancer:{f}\nmethod:{d}\n'.format(f=f, d=data_name))
20
            for typenum in range(2,7,1):
21
                all_silhouette=[]
22
                all_DBI=[]
23
                for i in range(1000):
24
                    clf = KMeans(n_clusters=typenum)
25
                    clf.fit(encoded_factors)  # 模型训练
26
                    labels = clf.labels_
27
                    silhouetteScore = silhouette_score(encoded_factors, labels, metric='euclidean')
28
                    all_silhouette.append(silhouetteScore)
29
                    davies_bouldinScore = davies_bouldin_score(encoded_factors, labels)
30
                    all_DBI.append(davies_bouldinScore)
31
                avg_silhouette=np.mean(all_silhouette)
32
                avg_DBI=np.mean(all_DBI)
33
34
                # print("silhouetteScore:", avg_silhouette)
35
                # print("davies_bouldinScore:", avg_DBI)
36
                print('k:{k}\nsilhouetteScore:{s}\ndavies_bouldinScore:{d}\n'.format(k=typenum, s=avg_silhouette,d=avg_DBI))
37
                f2.write('zly'*20+'\n')
38
                f2.write('k:{k}\nsilhouetteScore:{s}\ndavies_bouldinScore:{d}\n'.format(k=typenum, s=avg_silhouette,d=avg_DBI))
39
40
41
#直接拼接
42
# files = ['aml', 'breast', 'colon', 'kidney', 'liver', 'lung', 'melanoma', 'ovarian', 'sarcoma','gbm']
43
# for f in files:
44
#     datapath='./data/cancer_do_cluster/{f}'.format(f=f)
45
#     omics1 = np.loadtxt('{}/log_exp_omics.txt'.format(datapath))
46
#     omics1 = np.transpose(omics1)
47
#     omics1 = normalize(omics1, axis=0, norm='max')
48
#     print(omics1.shape)
49
#     omics2 = np.loadtxt('{}/log_mirna_omics.txt'.format(datapath))
50
#     omics2 = np.transpose(omics2)
51
#     omics2 = normalize(omics2, axis=0, norm='max')
52
#     print(omics2.shape)
53
#     omics3 = np.loadtxt('{}/methy_omics.txt'.format(datapath))
54
#     omics3 = np.transpose(omics3)
55
#     omics3 = normalize(omics3, axis=0, norm='max')
56
#     print(omics3.shape)
57
#     omics = np.concatenate((omics1, omics2, omics3), axis=1)
58
#     encoded_factors=omics
59
#     savepath='./result/cancer_do_cluster/{f}/Contact_cluster_result.txt'.format(f=f)
60
#     with open(savepath, 'w') as f2:
61
#         print('cancer:{f}\nmethod:直接拼接'.format(f=f))
62
#         f2.write('cancer:{f}\nmethod:直接拼接\n'.format(f=f))
63
#         for typenum in range(2,7,1):
64
#             all_silhouette=[]
65
#             all_DBI=[]
66
#             for i in range(100):
67
#                 clf = KMeans(n_clusters=typenum)
68
#                 clf.fit(encoded_factors)  # 模型训练
69
#                 labels = clf.labels_
70
#                 silhouetteScore = silhouette_score(encoded_factors, labels, metric='euclidean')
71
#                 all_silhouette.append(silhouetteScore)
72
#                 davies_bouldinScore = davies_bouldin_score(encoded_factors, labels)
73
#                 all_DBI.append(davies_bouldinScore)
74
#             avg_silhouette=np.mean(all_silhouette)
75
#             avg_DBI=np.mean(all_DBI)
76
77
#             # print("silhouetteScore:", avg_silhouette)
78
#             # print("davies_bouldinScore:", avg_DBI)
79
#             print('k:{k}\nsilhouetteScore:{s}\ndavies_bouldinScore:{d}\n'.format(k=typenum, s=avg_silhouette,d=avg_DBI))
80
#             f2.write('zly'*20+'\n')
81
#             f2.write('k:{k}\nsilhouetteScore:{s}\ndavies_bouldinScore:{d}\n'.format(k=typenum, s=avg_silhouette,d=avg_DBI))            
82