a b/python-scripts/runToGetMOG.py
1
import numpy as np
2
from sklearn.preprocessing import normalize
3
from keras.layers import Input, Dense,concatenate,Dropout,average
4
from keras.models import Model
5
from keras import backend as K
6
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score
7
import numpy as np
8
from sklearn.model_selection import StratifiedKFold
9
from keras.layers import Input, Dense,concatenate,Dropout,average
10
from keras.models import Model
11
import keras
12
from sklearn.metrics import classification_report
13
14
    
15
    
16
17
#cancer数据
18
if __name__ == '__main__':
19
    # files = ['breast2']
20
    files = ['gbm','breast2']
21
    for f in files:
22
        datapath='./data/cancer_d2d/{f}'.format(f=f)
23
        omics1 = np.loadtxt('{}/after_log_exp.txt'.format(datapath),str)
24
        omics1 = np.delete(omics1, 0, axis=1)
25
        #omics1 = np.transpose(omics1)
26
        omics1 = omics1.astype(np.float)
27
        omics1 = normalize(omics1, axis=0, norm='max')
28
        print(omics1.shape)
29
30
        omics2 = np.loadtxt('{}/after_log_mirna.txt'.format(datapath),str)
31
        omics2= np.delete(omics2, 0, axis=1)
32
        #omics2 = np.transpose(omics2)
33
        omics2 = omics2.astype(np.float)
34
        omics2 = normalize(omics2, axis=0, norm='max')
35
        print(omics2.shape)
36
37
        omics3 = np.loadtxt('{}/after_methy.txt'.format(datapath),str)
38
        omics3= np.delete(omics3,0,axis=1)
39
        #omics3 = np.transpose(omics3)
40
        omics3 = omics3.astype(np.float)
41
        omics3 = normalize(omics3, axis=0, norm='max')
42
        print(omics3.shape)
43
44
        labels = np.loadtxt('{datapath}/after_labels.txt'.format(datapath=datapath), str)
45
        labels = np.delete(labels, 0, axis=1)
46
        labels = labels.astype(np.int)
47
        labels = np.squeeze(labels,axis=1)
48
        # datapath = 'data/BRCA'
49
        # omics1 = np.loadtxt('{}/1_all.csv'.format(datapath),delimiter=',')
50
        # #omics1 = np.transpose(omics1)
51
        # omics1 = normalize(omics1, axis=0, norm='max')
52
53
        # omics2 = np.loadtxt('{}/2_all.csv'.format(datapath),delimiter=',')
54
        # #omics2 = np.transpose(omics2)
55
        # omics2 = normalize(omics2, axis=0, norm='max')
56
57
        # omics3 = np.loadtxt('{}/3_all.csv'.format(datapath),delimiter=',')
58
        # #omics3 = np.transpose(omics3)
59
        # omics3 = normalize(omics3, axis=0, norm='max')
60
        
61
        # k折交叉验证
62
        all_acc = []
63
        all_f1_macro = []
64
        all_f1_weighted = []
65
        all_auc_macro = []
66
        all_auc_weighted = []
67
        #omics = np.loadtxt('./result/nmf/mf_em.txt')
68
        omics = np.concatenate((omics1, omics2, omics3), axis=1)
69
        
70
        # labels = np.loadtxt('./data/BRCA/labels_all.csv', delimiter=',')
71
        # data=np.concatenate([])
72
        kfold = StratifiedKFold(n_splits=4, shuffle=True, random_state=1)
73
        for train_ix, test_ix in kfold.split(omics1, labels):
74
            omics_tobuild=[omics1,omics2,omics3]
75
            train_X_1=omics1[train_ix]
76
            train_X_2=omics2[train_ix]
77
            train_X_3=omics3[train_ix]
78
79
            test_X_1=omics1[test_ix]
80
            test_X_2=omics2[test_ix]
81
            test_X_3=omics3[test_ix]
82
            
83
            train_y, test_y = labels[train_ix], labels[test_ix]
84
            
85
            np.savetxt('{}/1_tr.csv'.format(datapath), train_X_1, delimiter=',')
86
            np.savetxt('{}/2_tr.csv'.format(datapath), train_X_2, delimiter=',')
87
            np.savetxt('{}/3_tr.csv'.format(datapath), train_X_3, delimiter=',')
88
            np.savetxt('{}/1_te.csv'.format(datapath), test_X_1, delimiter=',')
89
            np.savetxt('{}/2_te.csv'.format(datapath), test_X_2, delimiter=',')
90
            np.savetxt('{}/3_te.csv'.format(datapath), test_X_3, delimiter=',')
91
            np.savetxt('{}/labels_tr.csv'.format(datapath), train_y, delimiter=',')
92
            np.savetxt('{}/labels_te.csv'.format(datapath), test_y, delimiter=',')
93
            break
94
95
96
#simulations数据
97
# if __name__ == '__main__':
98
#     datatypes=["equal","heterogeneous"]
99
#     typenums=[5,10,15]
100
#     for datatype in datatypes:
101
#         for typenum in typenums:
102
#             datapath='data/simulations/{}/{}'.format(datatype, typenum)
103
    
104
#             labels = np.loadtxt('{}/c.txt'.format(datapath))
105
                
106
107
#             omics1 = np.loadtxt('{}/o1.txt'.format(datapath))
108
#             omics1 = np.transpose(omics1)
109
#             omics1 = normalize(omics1, axis=0, norm='max')
110
111
#             omics2 = np.loadtxt('{}/o2.txt'.format(datapath))
112
#             omics2 = np.transpose(omics2)
113
#             omics2 = normalize(omics2, axis=0, norm='max')
114
115
#             omics3 = np.loadtxt('{}/o3.txt'.format(datapath))
116
#             omics3 = np.transpose(omics3)
117
#             omics3 = normalize(omics3, axis=0, norm='max')
118
119
#             omics = np.concatenate((omics1, omics2, omics3), axis=1)
120
121
#             kfold = StratifiedKFold(n_splits=4, shuffle=True, random_state=1)
122
#             for train_ix, test_ix in kfold.split(omics1, labels):
123
#                 omics_tobuild=[omics1,omics2,omics3]
124
#                 train_X_1=omics1[train_ix]
125
#                 train_X_2=omics2[train_ix]
126
#                 train_X_3=omics3[train_ix]
127
128
#                 test_X_1=omics1[test_ix]
129
#                 test_X_2=omics2[test_ix]
130
#                 test_X_3=omics3[test_ix]
131
                
132
#                 train_y, test_y = labels[train_ix], labels[test_ix]
133
                
134
#                 np.savetxt('{}/1_tr.csv'.format(datapath), train_X_1, delimiter=',')
135
#                 np.savetxt('{}/2_tr.csv'.format(datapath), train_X_2, delimiter=',')
136
#                 np.savetxt('{}/3_tr.csv'.format(datapath), train_X_3, delimiter=',')
137
#                 np.savetxt('{}/1_te.csv'.format(datapath), test_X_1, delimiter=',')
138
#                 np.savetxt('{}/2_te.csv'.format(datapath), test_X_2, delimiter=',')
139
#                 np.savetxt('{}/3_te.csv'.format(datapath), test_X_3, delimiter=',')
140
#                 np.savetxt('{}/labels_tr.csv'.format(datapath), train_y, delimiter=',')
141
#                 np.savetxt('{}/labels_te.csv'.format(datapath), test_y, delimiter=',')
142
#                 break
143
144
#single数据
145
if __name__ == '__main__':
146
    
147
    datapath = 'data/single-cell/'
148
    resultpath = 'result/single-cell/'
149
    labels = np.loadtxt('{}/c.txt'.format(datapath))
150
    # groundtruth = list(np.int_(groundtruth))
151
152
    omics = np.loadtxt('{}/omics.txt'.format(datapath))
153
    omics = np.transpose(omics)
154
    omics1=omics[0:206]
155
    omics2=omics[206:412]
156
    omics1 = normalize(omics1, axis=0, norm='max')
157
    omics2 = normalize(omics2, axis=0, norm='max')
158
    omics = np.concatenate((omics1, omics2), axis=1)
159
    
160
161
    kfold = StratifiedKFold(n_splits=4, shuffle=True, random_state=1)
162
    for train_ix, test_ix in kfold.split(omics1, labels):
163
        omics_tobuild=[omics1,omics2]
164
        train_X_1=omics1[train_ix]
165
        train_X_2=omics2[train_ix]
166
167
168
        test_X_1=omics1[test_ix]
169
        test_X_2=omics2[test_ix]
170
171
        
172
        train_y, test_y = labels[train_ix], labels[test_ix]
173
        
174
        np.savetxt('{}/1_tr.csv'.format(datapath), train_X_1, delimiter=',')
175
        np.savetxt('{}/2_tr.csv'.format(datapath), train_X_2, delimiter=',')
176
        np.savetxt('{}/1_te.csv'.format(datapath), test_X_1, delimiter=',')
177
        np.savetxt('{}/2_te.csv'.format(datapath), test_X_2, delimiter=',')
178
        np.savetxt('{}/labels_tr.csv'.format(datapath), train_y, delimiter=',')
179
        np.savetxt('{}/labels_te.csv'.format(datapath), test_y, delimiter=',')
180
        break
181
182
183