--- a +++ b/python-scripts/runToGetMOG.py @@ -0,0 +1,183 @@ +import numpy as np +from sklearn.preprocessing import normalize +from keras.layers import Input, Dense,concatenate,Dropout,average +from keras.models import Model +from keras import backend as K +from sklearn.metrics import roc_auc_score, f1_score, accuracy_score +import numpy as np +from sklearn.model_selection import StratifiedKFold +from keras.layers import Input, Dense,concatenate,Dropout,average +from keras.models import Model +import keras +from sklearn.metrics import classification_report + + + + +#cancer数据 +if __name__ == '__main__': + # files = ['breast2'] + files = ['gbm','breast2'] + for f in files: + datapath='./data/cancer_d2d/{f}'.format(f=f) + omics1 = np.loadtxt('{}/after_log_exp.txt'.format(datapath),str) + omics1 = np.delete(omics1, 0, axis=1) + #omics1 = np.transpose(omics1) + omics1 = omics1.astype(np.float) + omics1 = normalize(omics1, axis=0, norm='max') + print(omics1.shape) + + omics2 = np.loadtxt('{}/after_log_mirna.txt'.format(datapath),str) + omics2= np.delete(omics2, 0, axis=1) + #omics2 = np.transpose(omics2) + omics2 = omics2.astype(np.float) + omics2 = normalize(omics2, axis=0, norm='max') + print(omics2.shape) + + omics3 = np.loadtxt('{}/after_methy.txt'.format(datapath),str) + omics3= np.delete(omics3,0,axis=1) + #omics3 = np.transpose(omics3) + omics3 = omics3.astype(np.float) + omics3 = normalize(omics3, axis=0, norm='max') + print(omics3.shape) + + labels = np.loadtxt('{datapath}/after_labels.txt'.format(datapath=datapath), str) + labels = np.delete(labels, 0, axis=1) + labels = labels.astype(np.int) + labels = np.squeeze(labels,axis=1) + # datapath = 'data/BRCA' + # omics1 = np.loadtxt('{}/1_all.csv'.format(datapath),delimiter=',') + # #omics1 = np.transpose(omics1) + # omics1 = normalize(omics1, axis=0, norm='max') + + # omics2 = np.loadtxt('{}/2_all.csv'.format(datapath),delimiter=',') + # #omics2 = np.transpose(omics2) + # omics2 = normalize(omics2, axis=0, norm='max') + + # omics3 = np.loadtxt('{}/3_all.csv'.format(datapath),delimiter=',') + # #omics3 = np.transpose(omics3) + # omics3 = normalize(omics3, axis=0, norm='max') + + # k折交叉验证 + all_acc = [] + all_f1_macro = [] + all_f1_weighted = [] + all_auc_macro = [] + all_auc_weighted = [] + #omics = np.loadtxt('./result/nmf/mf_em.txt') + omics = np.concatenate((omics1, omics2, omics3), axis=1) + + # labels = np.loadtxt('./data/BRCA/labels_all.csv', delimiter=',') + # data=np.concatenate([]) + kfold = StratifiedKFold(n_splits=4, shuffle=True, random_state=1) + for train_ix, test_ix in kfold.split(omics1, labels): + omics_tobuild=[omics1,omics2,omics3] + train_X_1=omics1[train_ix] + train_X_2=omics2[train_ix] + train_X_3=omics3[train_ix] + + test_X_1=omics1[test_ix] + test_X_2=omics2[test_ix] + test_X_3=omics3[test_ix] + + train_y, test_y = labels[train_ix], labels[test_ix] + + np.savetxt('{}/1_tr.csv'.format(datapath), train_X_1, delimiter=',') + np.savetxt('{}/2_tr.csv'.format(datapath), train_X_2, delimiter=',') + np.savetxt('{}/3_tr.csv'.format(datapath), train_X_3, delimiter=',') + np.savetxt('{}/1_te.csv'.format(datapath), test_X_1, delimiter=',') + np.savetxt('{}/2_te.csv'.format(datapath), test_X_2, delimiter=',') + np.savetxt('{}/3_te.csv'.format(datapath), test_X_3, delimiter=',') + np.savetxt('{}/labels_tr.csv'.format(datapath), train_y, delimiter=',') + np.savetxt('{}/labels_te.csv'.format(datapath), test_y, delimiter=',') + break + + +#simulations数据 +# if __name__ == '__main__': +# datatypes=["equal","heterogeneous"] +# typenums=[5,10,15] +# for datatype in datatypes: +# for typenum in typenums: +# datapath='data/simulations/{}/{}'.format(datatype, typenum) + +# labels = np.loadtxt('{}/c.txt'.format(datapath)) + + +# omics1 = np.loadtxt('{}/o1.txt'.format(datapath)) +# omics1 = np.transpose(omics1) +# omics1 = normalize(omics1, axis=0, norm='max') + +# omics2 = np.loadtxt('{}/o2.txt'.format(datapath)) +# omics2 = np.transpose(omics2) +# omics2 = normalize(omics2, axis=0, norm='max') + +# omics3 = np.loadtxt('{}/o3.txt'.format(datapath)) +# omics3 = np.transpose(omics3) +# omics3 = normalize(omics3, axis=0, norm='max') + +# omics = np.concatenate((omics1, omics2, omics3), axis=1) + +# kfold = StratifiedKFold(n_splits=4, shuffle=True, random_state=1) +# for train_ix, test_ix in kfold.split(omics1, labels): +# omics_tobuild=[omics1,omics2,omics3] +# train_X_1=omics1[train_ix] +# train_X_2=omics2[train_ix] +# train_X_3=omics3[train_ix] + +# test_X_1=omics1[test_ix] +# test_X_2=omics2[test_ix] +# test_X_3=omics3[test_ix] + +# train_y, test_y = labels[train_ix], labels[test_ix] + +# np.savetxt('{}/1_tr.csv'.format(datapath), train_X_1, delimiter=',') +# np.savetxt('{}/2_tr.csv'.format(datapath), train_X_2, delimiter=',') +# np.savetxt('{}/3_tr.csv'.format(datapath), train_X_3, delimiter=',') +# np.savetxt('{}/1_te.csv'.format(datapath), test_X_1, delimiter=',') +# np.savetxt('{}/2_te.csv'.format(datapath), test_X_2, delimiter=',') +# np.savetxt('{}/3_te.csv'.format(datapath), test_X_3, delimiter=',') +# np.savetxt('{}/labels_tr.csv'.format(datapath), train_y, delimiter=',') +# np.savetxt('{}/labels_te.csv'.format(datapath), test_y, delimiter=',') +# break + +#single数据 +if __name__ == '__main__': + + datapath = 'data/single-cell/' + resultpath = 'result/single-cell/' + labels = np.loadtxt('{}/c.txt'.format(datapath)) + # groundtruth = list(np.int_(groundtruth)) + + omics = np.loadtxt('{}/omics.txt'.format(datapath)) + omics = np.transpose(omics) + omics1=omics[0:206] + omics2=omics[206:412] + omics1 = normalize(omics1, axis=0, norm='max') + omics2 = normalize(omics2, axis=0, norm='max') + omics = np.concatenate((omics1, omics2), axis=1) + + + kfold = StratifiedKFold(n_splits=4, shuffle=True, random_state=1) + for train_ix, test_ix in kfold.split(omics1, labels): + omics_tobuild=[omics1,omics2] + train_X_1=omics1[train_ix] + train_X_2=omics2[train_ix] + + + test_X_1=omics1[test_ix] + test_X_2=omics2[test_ix] + + + train_y, test_y = labels[train_ix], labels[test_ix] + + np.savetxt('{}/1_tr.csv'.format(datapath), train_X_1, delimiter=',') + np.savetxt('{}/2_tr.csv'.format(datapath), train_X_2, delimiter=',') + np.savetxt('{}/1_te.csv'.format(datapath), test_X_1, delimiter=',') + np.savetxt('{}/2_te.csv'.format(datapath), test_X_2, delimiter=',') + np.savetxt('{}/labels_tr.csv'.format(datapath), train_y, delimiter=',') + np.savetxt('{}/labels_te.csv'.format(datapath), test_y, delimiter=',') + break + + +