a b/python-scripts/runSimulationsMMDVAE.py
1
from keras.layers import Input, Dense
2
from keras.models import Model
3
import numpy as np
4
import pandas as pd
5
import matplotlib.pyplot as plt
6
from sklearn.cluster import KMeans
7
from sklearn.cluster import k_means
8
from sklearn.metrics import silhouette_score, davies_bouldin_score
9
from sklearn.preprocessing import normalize
10
import time
11
from sklearn import metrics
12
from myUtils import *
13
from ZVAEclass import ZVAE
14
import os
15
16
if __name__ == '__main__':
17
    datatypes=["equal","heterogeneous"]
18
    typenums=[5,10,15]
19
    for datatype in datatypes:
20
        for typenum in typenums:
21
            datapath='data/simulations/{}/{}'.format(datatype, typenum)
22
            resultpath='result/simulations/{}/{}'.format(datatype, typenum)
23
            groundtruth = np.loadtxt('{}/c.txt'.format(datapath))
24
            groundtruth = list(np.int_(groundtruth))
25
26
            omics1 = np.loadtxt('{}/o1.txt'.format(datapath))
27
            omics1 = np.transpose(omics1)
28
            omics1 = normalize(omics1, axis=0, norm='max')
29
            dim1=omics1.shape[1]
30
            print(omics1.shape)
31
            omics2 = np.loadtxt('{}/o2.txt'.format(datapath))
32
            omics2 = np.transpose(omics2)
33
            omics2 = normalize(omics2, axis=0, norm='max')
34
            dim2=omics2.shape[1]
35
            print(omics2.shape)
36
            omics3 = np.loadtxt('{}/o3.txt'.format(datapath))
37
            omics3 = np.transpose(omics3)
38
            omics3 = normalize(omics3, axis=0, norm='max')
39
            dim3=omics3.shape[1]
40
            print(omics3.shape)
41
            # omics = np.concatenate((omics1, omics2, omics3), axis=1)
42
            dims = [dim1, dim2, dim3]
43
            omics=[omics1,omics2,omics3]
44
45
            # data = omics
46
            # # input_dim = data.shape[1]
47
            # encoding1_dim = 300
48
            # encoding2_dim = 100
49
            # middle_dim = 15
50
            # dims = [encoding1_dim, encoding2_dim, middle_dim]
51
            vae = ZVAE(dims)
52
            vae.autoencoder.summary()
53
            vae.autoencoder.fit(omics, omics, epochs=100,verbose=1, batch_size=16, shuffle=True)
54
            encoded_factors = vae.encoder.predict(omics)
55
            # if not os.path.exists("{}/MMDVAE_EM.txt".format(resultpath)):
56
            #     os.mknod("{}/MMDVAE_EM.txt".format(resultpath))
57
            np.savetxt("{}/MMDVAE_EM_15.txt".format(resultpath), encoded_factors)
58
59
            # if not os.path.exists("AE_FCTAE_Kmeans.txt"):
60
            #     os.mknod("AE_FCTAE_Kmeans.txt")
61
            # fo = open("AE_FCTAE_Kmeans.txt", "a")
62
            # clf = KMeans(n_clusters=typenum)
63
            # t0 = time.time()
64
            # clf.fit(encoded_factors)  # 模型训练
65
            # km_batch = time.time() - t0  # 使用kmeans训练数据消耗的时间
66
67
            # print(datatype, typenum)
68
            # print("K-Means算法模型训练消耗时间:%.4fs" % km_batch)
69
70
            # # 效果评估
71
            # score_funcs = [
72
            #     metrics.adjusted_rand_score,  # ARI(调整兰德指数)
73
            #     metrics.v_measure_score,  # 均一性与完整性的加权平均
74
            #     metrics.adjusted_mutual_info_score,  # AMI(调整互信息)
75
            #     metrics.mutual_info_score,  # 互信息
76
            # ]
77
            # centers = clf.cluster_centers_
78
            # #print("centers:")
79
            # #print(centers)
80
            # print("zlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzly")
81
            # labels = clf.labels_
82
            # print("labels:")
83
            # print(labels)
84
            # labels = list(np.int_(labels))
85
            # if not os.path.exists("{}/VAE_FCTAE_CL.txt".format(resultpath)):
86
            #     os.mknod("{}/VAE_FCTAE_CL.txt".format(resultpath))
87
            # np.savetxt("{}/VAE_FCTAE_CL.txt".format(resultpath), labels,fmt='%d')
88
            # print("zlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzly")
89
            # # 2. 迭代对每个评估函数进行评估操作
90
            # for score_func in score_funcs:
91
            #     t0 = time.time()
92
            #     km_scores = score_func(groundtruth, labels)
93
            #     print("K-Means算法:%s评估函数计算结果值:%.5f;计算消耗时间:%0.3fs" % (score_func.__name__, km_scores, time.time() - t0))
94
            # t0 = time.time()
95
            # jaccard_score = jaccard_coefficient(groundtruth, labels)
96
            # print("K-Means算法:%s评估函数计算结果值:%.5f;计算消耗时间:%0.3fs" % (
97
            #     jaccard_coefficient.__name__, jaccard_score, time.time() - t0))
98
            # silhouetteScore = silhouette_score(encoded_factors, labels, metric='euclidean')
99
            # davies_bouldinScore = davies_bouldin_score(encoded_factors, labels)
100
            # print("silhouetteScore:", silhouetteScore)
101
            # print("davies_bouldinScore:", davies_bouldinScore)
102
            # print("zlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzlyzly")
103
104
105
106
107
108