Switch to unified view

a b/python-scripts/runCancerSVAE.py
1
from keras.layers import Input, Dense
2
from keras.models import Model
3
import numpy as np
4
import pandas as pd
5
import matplotlib.pyplot as plt
6
from sklearn.cluster import KMeans
7
from sklearn.cluster import k_means
8
from sklearn.metrics import silhouette_score, davies_bouldin_score
9
from sklearn.preprocessing import normalize
10
import time
11
from sklearn import metrics
12
from myUtils import *
13
from SVAEclass import VAE
14
import os
15
from keras import backend as K
16
17
18
def get_EM(datapath,resultpath):
19
    omics1 = np.loadtxt('{}/log_exp_omics.txt'.format(datapath))
20
    omics1 = np.transpose(omics1)
21
    omics1 = normalize(omics1, axis=0, norm='max')
22
    print(omics1.shape)
23
    omics2 = np.loadtxt('{}/log_mirna_omics.txt'.format(datapath))
24
    omics2 = np.transpose(omics2)
25
    omics2 = normalize(omics2, axis=0, norm='max')
26
    print(omics2.shape)
27
    omics3 = np.loadtxt('{}/methy_omics.txt'.format(datapath))
28
    omics3 = np.transpose(omics3)
29
    omics3 = normalize(omics3, axis=0, norm='max')
30
    print(omics3.shape)
31
    omics = np.concatenate((omics1, omics2, omics3), axis=1)
32
    print(omics.shape)
33
    data = omics
34
    input_dim = data.shape[1]
35
    encoding1_dim = 3000
36
    encoding2_dim = 300
37
    middle_dim = 10
38
    noise_factor = 0.1
39
40
    dims = [encoding1_dim, encoding2_dim, middle_dim]
41
    vae = VAE(data, dims)
42
    vae.autoencoder.summary()
43
    vae.train()
44
    encoded_factors = vae.predict(data)
45
    if not os.path.exists("{}/SVAE_FCTAE_EM.txt".format(resultpath)):
46
        os.mknod("{}/SVAE_FCTAE_EM.txt".format(resultpath))
47
    np.savetxt("{}/SVAE_FCTAE_EM.txt".format(resultpath), encoded_factors)
48
    K.clear_session()
49
50
51
if __name__ == '__main__':
52
    data_dir_list = []
53
    result_dir_list = []
54
    data_path = r"data/cancer"
55
    result_path = r"result/cancer"
56
    dir_or_files = os.listdir(data_path)
57
    for dir_file in dir_or_files:
58
        # 获取目录或者文件的路径
59
        data_dir_file_path = os.path.join(data_path, dir_file)
60
        result_dir_file_path = os.path.join(result_path, dir_file)
61
        # 判断该路径为文件还是路径
62
        if os.path.isdir(data_dir_file_path):
63
            data_dir_list.append(data_dir_file_path)
64
            if not os.path.exists(result_dir_file_path):
65
                os.makedirs(result_dir_file_path)
66
            result_dir_list.append(result_dir_file_path)
67
    #print(data_dir_list)
68
    #print(result_dir_list)
69
    # data_dir_list=['data/cancer/breast', 'data/cancer/gbm', 'data/cancer/ovarian', 'data/cancer/sarcoma', 'data/cancer/lung', 'data/cancer/liver']
70
    # result_dir_list=['result/cancer/breast', 'result/cancer/gbm', 'result/cancer/ovarian', 'result/cancer/sarcoma', 'result/cancer/lung', 'result/cancer/liver']
71
72
    # for datapath,resultpath in zip(data_dir_list,result_dir_list):
73
    #     get_EM(datapath, resultpath)
74
    datapath='data/cancer/gbm'
75
    resultpath='result/cancer/gbm'  
76
    get_EM(datapath, resultpath)
77
78
79
80
81
82
83
84
85