a b/python-scripts/runSimulationsDAE2.py
1
from keras.layers import Input, Dense
2
from keras.models import Model
3
import numpy as np
4
import pandas as pd
5
import matplotlib.pyplot as plt
6
from sklearn.cluster import KMeans
7
from sklearn.cluster import k_means
8
from sklearn.metrics import silhouette_score, davies_bouldin_score
9
from sklearn.preprocessing import normalize
10
import time
11
from sklearn import metrics
12
from myUtils import *
13
from DAEclass import DAE
14
import os
15
16
if __name__ == '__main__':
17
    datatypes=["equal","heterogeneous"]
18
    typenums=[5,10,15]
19
    noise_factor=0.5
20
    for datatype in datatypes:
21
        for typenum in typenums:
22
            datapath='data/simulations/{}/{}'.format(datatype, typenum)
23
            resultpath='result/simulations/{}/{}'.format(datatype, typenum)
24
            groundtruth = np.loadtxt('{}/c.txt'.format(datapath))
25
            groundtruth = list(np.int_(groundtruth))
26
27
            omics1 = np.loadtxt('{}/o1.txt'.format(datapath))
28
            omics1 = np.transpose(omics1)
29
            omics1 = normalize(omics1, axis=0, norm='max')
30
31
            omics2 = np.loadtxt('{}/o2.txt'.format(datapath))
32
            omics2 = np.transpose(omics2)
33
            omics2 = normalize(omics2, axis=0, norm='max')
34
35
            omics3 = np.loadtxt('{}/o3.txt'.format(datapath))
36
            omics3 = np.transpose(omics3)
37
            omics3 = normalize(omics3, axis=0, norm='max')
38
39
            omics = np.concatenate((omics1, omics2, omics3), axis=1)
40
41
42
            # ae = AE(data, dims)
43
            # ae.train()
44
            # encoded_factors = ae.predict(data)
45
46
            noise_factor = 0.1
47
48
            encoding1_dim1 = 100
49
            encoding2_dim1 = 50
50
            if typenum==15:
51
                middle_dim1 = 5
52
            elif typenum==10:
53
                middle_dim1 = 4
54
            elif typenum==5:
55
                middle_dim1 = 3
56
            dims1 = [encoding1_dim1, encoding2_dim1, middle_dim1]
57
            ae1 = DAE(omics1, dims1,noise_factor)
58
            ae1.train()
59
            ae1.autoencoder.summary()
60
            encoded_factor1 = ae1.predict(omics1)
61
62
            encoding1_dim2 = 80
63
            encoding2_dim2 = 50
64
            if typenum==15:
65
                middle_dim2 = 5
66
            elif typenum==10:
67
                middle_dim2 = 3
68
            elif typenum==5:
69
                middle_dim2 = 1
70
            dims2 = [encoding1_dim2, encoding2_dim2, middle_dim2]
71
            ae2 = DAE(omics2, dims2,noise_factor)
72
            ae2.train()
73
            ae2.autoencoder.summary()
74
            encoded_factor2 = ae2.predict(omics2)
75
76
            encoding1_dim3 = 80
77
            encoding2_dim3 = 50
78
            if typenum==15:
79
                middle_dim3 = 5
80
            elif typenum==10:
81
                middle_dim3 = 3
82
            elif typenum==5:
83
                middle_dim3 = 1
84
            dims3 = [encoding1_dim3, encoding2_dim3, middle_dim3]
85
            ae3 = DAE(omics3, dims3,noise_factor)
86
            ae3.autoencoder.summary()
87
            ae3.train()
88
            encoded_factor3 = ae3.predict(omics3)
89
90
            encoded_factors = np.concatenate((encoded_factor1, encoded_factor2, encoded_factor3), axis=1)
91
92
            if not os.path.exists("{}/DAE_FAETC_EM.txt".format(resultpath)):
93
                os.mknod("{}/DAE_FAETC_EM.txt".format(resultpath))
94
            np.savetxt("{}/DAE_FAETC_EM_{}.txt".format(resultpath,typenum), encoded_factors)
95
96
            # if not os.path.exists("AE_FCTAE_Kmeans.txt"):
97
            #     os.mknod("AE_FCTAE_Kmeans.txt")
98
            # fo = open("AE_FCTAE_Kmeans.txt", "a")
99
            # clf = KMeans(n_clusters=typenum)
100
            # t0 = time.time()
101
            # clf.fit(encoded_factors)  # 模型训练
102
            # km_batch = time.time() - t0  # 使用kmeans训练数据消耗的时间
103
104
            # print(datatype, typenum)
105
            # print("K-Means算法模型训练消耗时间:%.4fs" % km_batch)
106
107
            # # 效果评估
108
            # score_funcs = [
109
            #     metrics.adjusted_rand_score,  # ARI(调整兰德指数)
110
            #     metrics.v_measure_score,  # 均一性与完整性的加权平均
111
            #     metrics.adjusted_mutual_info_score,  # AMI(调整互信息)
112
            #     metrics.mutual_info_score,  # 互信息
113
            # ]
114
115
            # centers = clf.cluster_centers_
116
            # print("zlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzly")
117
            # #print("centers:")
118
            # #print(centers)
119
            # print("zlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzly")
120
            # labels = clf.labels_
121
            # print("labels:")
122
            # print(labels)
123
            # labels = list(np.int_(labels))
124
            # if not os.path.exists("{}/DAE_FAETC_CL.txt".format(resultpath)):
125
            #     os.mknod("{}/DAE_FAETC_CL.txt".format(resultpath))
126
            # np.savetxt("{}/DAE_FAETC_CL.txt".format(resultpath), labels,fmt='%d')
127
            # print("zlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzly")
128
            # # 2. 迭代对每个评估函数进行评估操作
129
            # for score_func in score_funcs:
130
            #     t0 = time.time()
131
            #     km_scores = score_func(groundtruth, labels)
132
            #     print("K-Means算法:%s评估函数计算结果值:%.5f;计算消耗时间:%0.3fs" % (score_func.__name__, km_scores, time.time() - t0))
133
            # t0 = time.time()
134
            # jaccard_score = jaccard_coefficient(groundtruth, labels)
135
            # print("K-Means算法:%s评估函数计算结果值:%.5f;计算消耗时间:%0.3fs" % (
136
            #     jaccard_coefficient.__name__, jaccard_score, time.time() - t0))
137
            # silhouetteScore = silhouette_score(encoded_factors, labels, metric='euclidean')
138
            # davies_bouldinScore = davies_bouldin_score(encoded_factors, labels)
139
            # print("silhouetteScore:", silhouetteScore)
140
            # print("davies_bouldinScore:", davies_bouldinScore)
141
            # print("zlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzly")
142
143
144
145
146
147