Switch to unified view

a b/python-scripts/runCancerMMDVAE.py
1
from keras.layers import Input, Dense
2
from keras.models import Model
3
import numpy as np
4
import pandas as pd
5
import matplotlib.pyplot as plt
6
from sklearn.cluster import KMeans
7
from sklearn.cluster import k_means
8
from sklearn.metrics import silhouette_score, davies_bouldin_score
9
from sklearn.preprocessing import normalize
10
import time
11
from sklearn import metrics
12
from myUtils import *
13
from ZVAEclass import ZVAE
14
import os
15
from keras import backend as K
16
17
def get_EM(datapath,resultpath):
18
    omics1 = np.loadtxt('{}/log_exp_omics.txt'.format(datapath))
19
    omics1 = np.transpose(omics1)
20
    omics1 = normalize(omics1, axis=0, norm='max')
21
    dim1=omics1.shape[1]
22
    print(omics1.shape)
23
    omics2 = np.loadtxt('{}/log_mirna_omics.txt'.format(datapath))
24
    omics2 = np.transpose(omics2)
25
    omics2 = normalize(omics2, axis=0, norm='max')
26
    dim2=omics2.shape[1]
27
    print(omics2.shape)
28
    omics3 = np.loadtxt('{}/methy_omics.txt'.format(datapath))
29
    omics3 = np.transpose(omics3)
30
    omics3 = normalize(omics3, axis=0, norm='max')
31
    dim3=omics3.shape[1]
32
    print(omics3.shape)
33
    # omics = np.concatenate((omics1, omics2, omics3), axis=1)
34
    # print(omics.shape)
35
    # data = omics
36
    # input_dim = data.shape[1]
37
    # encoding1_dim = 3000
38
    # encoding2_dim = 300
39
    # middle_dim = 10
40
    # noise_factor = 0.1
41
    omics=[omics1,omics2,omics3]
42
    dims = [dim1, dim2, dim3]
43
    vae = ZVAE(dims)
44
    #vae.autoencoder.summary()
45
    vae.autoencoder.fit(omics, omics, epochs=100,verbose=1, batch_size=16, shuffle=True)
46
    encoded_factors = vae.encoder.predict(omics)
47
    # if not os.path.exists("{}/MMDVAE_EM.txt".format(resultpath)):
48
    #     os.mknod("{}/MMDVAE_EM.txt".format(resultpath))
49
    #np.savetxt("{}/MMDVAE_EM_5.txt".format(resultpath), encoded_factors)
50
    np.savetxt("{}/MMDVAE_EM_10.txt".format(resultpath), encoded_factors)
51
    #np.savetxt("{}/MMDVAE_EM_15.txt".format(resultpath), encoded_factors)
52
    K.clear_session()
53
54
55
if __name__ == '__main__':
56
    data_dir_list = []
57
    result_dir_list = []
58
    data_path = r"data/cancer"
59
    result_path = r"result/cancer"
60
    dir_or_files = os.listdir(data_path)
61
    for dir_file in dir_or_files:
62
        # 获取目录或者文件的路径
63
        data_dir_file_path = os.path.join(data_path, dir_file)
64
        result_dir_file_path = os.path.join(result_path, dir_file)
65
        # 判断该路径为文件还是路径
66
        if os.path.isdir(data_dir_file_path):
67
            data_dir_list.append(data_dir_file_path)
68
            if not os.path.exists(result_dir_file_path):
69
                os.makedirs(result_dir_file_path)
70
            result_dir_list.append(result_dir_file_path)
71
    #print(data_dir_list)
72
    #print(result_dir_list)
73
    #data_dir_list=['data/cancer/breast', 'data/cancer/kidney', 'data/cancer/lung', 'data/cancer/liver']
74
    #result_dir_list=['result/cancer/breast', 'result/cancer/kidney', 'result/cancer/lung', 'result/cancer/liver']
75
76
    for datapath,resultpath in zip(data_dir_list,result_dir_list):
77
        get_EM(datapath, resultpath)
78
79
80
    # datapath='data/cancer/liver'
81
    # resultpath='result/cancer/liver'  
82
    # get_EM(datapath, resultpath)
83
84
85
86
87
88
89
90
91