Switch to unified view

a b/python-scripts/runCancerAE2.py
1
from keras.layers import Input, Dense
2
from keras.models import Model
3
import numpy as np
4
import pandas as pd
5
import matplotlib.pyplot as plt
6
from sklearn.cluster import KMeans
7
from sklearn.cluster import k_means
8
from sklearn.metrics import silhouette_score, davies_bouldin_score
9
from sklearn.preprocessing import normalize
10
import time
11
from sklearn import metrics
12
from myUtils import *
13
from AEclass import AE
14
import os
15
from keras import backend as K
16
17
18
if __name__ == '__main__':
19
    data_dir_list = []
20
    result_dir_list = []
21
    data_path = r"data/cancer4"
22
    result_path = r"result/cancer4"
23
    dir_or_files = os.listdir(data_path)
24
    for dir_file in dir_or_files:
25
        # 获取目录或者文件的路径
26
        data_dir_file_path = os.path.join(data_path, dir_file)
27
        result_dir_file_path = os.path.join(result_path, dir_file)
28
        # 判断该路径为文件还是路径
29
        if os.path.isdir(data_dir_file_path):
30
            data_dir_list.append(data_dir_file_path)
31
            if not os.path.exists(result_dir_file_path):
32
                os.makedirs(result_dir_file_path)
33
            result_dir_list.append(result_dir_file_path)
34
    #print(data_dir_list)
35
    #print(result_dir_list)
36
    #data_dir_list=['data/cancer/breast', 'data/cancer/kidney', 'data/cancer/lung', 'data/cancer/liver']
37
    #result_dir_list=['result/cancer/breast', 'result/cancer/kidney', 'result/cancer/lung', 'result/cancer/liver']
38
39
    for datapath,resultpath in zip(data_dir_list,result_dir_list):
40
41
        omics1 = np.loadtxt('{}/log_exp_omics.txt'.format(datapath))
42
        omics1 = np.transpose(omics1)
43
        omics1 = normalize(omics1, axis=0, norm='max')
44
        print(omics1.shape)
45
        omics2 = np.loadtxt('{}/log_mirna_omics.txt'.format(datapath))
46
        omics2 = np.transpose(omics2)
47
        omics2 = normalize(omics2, axis=0, norm='max')
48
        print(omics2.shape)
49
        omics3 = np.loadtxt('{}/methy_omics.txt'.format(datapath))
50
        omics3 = np.transpose(omics3)
51
        omics3 = normalize(omics3, axis=0, norm='max')
52
        print(omics3.shape)
53
        omics = np.concatenate((omics1, omics2, omics3), axis=1)
54
        print(omics.shape)
55
56
57
        encoding1_dim1 = 1000
58
        encoding2_dim1 = 100
59
        middle_dim1 = 4
60
        dims1 = [encoding1_dim1, encoding2_dim1, middle_dim1]
61
        ae1 = AE(omics1, dims1)
62
        ae1.train()
63
        ae1.autoencoder.summary()
64
        encoded_factor1 = ae1.predict(omics1)
65
66
        encoding1_dim2 = 500
67
        encoding2_dim2 = 50
68
        middle_dim2 = 2
69
        dims2 = [encoding1_dim2, encoding2_dim2, middle_dim2]
70
        ae2 = AE(omics2, dims2)
71
        ae2.train()
72
        ae2.autoencoder.summary()
73
        encoded_factor2 = ae2.predict(omics2)
74
75
        encoding1_dim3 = 1000
76
        encoding2_dim3 = 100
77
        middle_dim3 = 4
78
        dims3 = [encoding1_dim3, encoding2_dim3, middle_dim3]
79
        ae3 = AE(omics3, dims3)
80
        ae3.autoencoder.summary()
81
        ae3.train()
82
        encoded_factor3 = ae3.predict(omics3)
83
84
        encoded_factors = np.concatenate((encoded_factor1, encoded_factor2, encoded_factor3), axis=1)
85
86
        if not os.path.exists("{}/AE_FAETC_EM.txt".format(resultpath)):
87
            os.mknod("{}/AE_FAETC_EM.txt".format(resultpath))
88
        np.savetxt("{}/AE_FAETC_EM.txt".format(resultpath), encoded_factors)
89
        K.clear_session()
90
91
92
93
94
95
96