|
a |
|
b/examples/load_and_save_models.py |
|
|
1 |
import os |
|
|
2 |
os.environ['PYTHONHASHSEED']=str(2020) |
|
|
3 |
|
|
|
4 |
import random |
|
|
5 |
random.seed(2020) |
|
|
6 |
|
|
|
7 |
from os.path import abspath |
|
|
8 |
from os.path import split |
|
|
9 |
|
|
|
10 |
from simdeep.simdeep_boosting import SimDeepBoosting |
|
|
11 |
from simdeep.simdeep_utils import save_model |
|
|
12 |
from simdeep.simdeep_utils import load_model |
|
|
13 |
|
|
|
14 |
|
|
|
15 |
def test_instance(): |
|
|
16 |
""" |
|
|
17 |
example of SimDeepBoosting |
|
|
18 |
""" |
|
|
19 |
PATH_DATA = '{0}/../examples/data/'.format(split(abspath(__file__))[0]) |
|
|
20 |
|
|
|
21 |
#Input file |
|
|
22 |
TRAINING_TSV = {'RNA': 'rna_dummy.tsv', 'METH': 'meth_dummy.tsv'} |
|
|
23 |
SURVIVAL_TSV = 'survival_dummy.tsv' |
|
|
24 |
|
|
|
25 |
PROJECT_NAME = 'TestProject' |
|
|
26 |
SEED = 3 |
|
|
27 |
nb_it = 2 # Number of models to be built |
|
|
28 |
nb_threads = 2 # Number of processes to be used to fit individual survival models |
|
|
29 |
|
|
|
30 |
################ AUTOENCODER PARAMETERS ################ |
|
|
31 |
EPOCHS = 10 |
|
|
32 |
## Additional parameters for the autoencoders can be defined, see config.py file for details |
|
|
33 |
# LEVEL_DIMS_IN = [250] |
|
|
34 |
# LEVEL_DIMS_OUT = [250] |
|
|
35 |
# LOSS = 'binary_crossentropy' |
|
|
36 |
# OPTIMIZER = 'adam' |
|
|
37 |
# ACT_REG = 0 |
|
|
38 |
# W_REG = 0 |
|
|
39 |
# DROPOUT = 0.5 |
|
|
40 |
# DATA_SPLIT = 0 |
|
|
41 |
# ACTIVATION = 'tanh' |
|
|
42 |
######################################################### |
|
|
43 |
|
|
|
44 |
################ ADDITIONAL PARAMETERS ################## |
|
|
45 |
# PATH_TO_SAVE_MODEL = '/home/username/deepprog' |
|
|
46 |
# PVALUE_THRESHOLD = 0.01 |
|
|
47 |
# NB_SELECTED_FEATURES = 10 |
|
|
48 |
# STACK_MULTI_OMIC = False |
|
|
49 |
######################################################### |
|
|
50 |
|
|
|
51 |
from sklearn.preprocessing import RobustScaler |
|
|
52 |
norm = { |
|
|
53 |
'CUSTOM': RobustScaler, |
|
|
54 |
} |
|
|
55 |
|
|
|
56 |
boosting = SimDeepBoosting( |
|
|
57 |
nb_threads=nb_threads, |
|
|
58 |
nb_it=nb_it, |
|
|
59 |
split_n_fold=3, |
|
|
60 |
survival_tsv=SURVIVAL_TSV, |
|
|
61 |
training_tsv=TRAINING_TSV, |
|
|
62 |
path_data=PATH_DATA, |
|
|
63 |
project_name=PROJECT_NAME, |
|
|
64 |
path_results=PATH_DATA, |
|
|
65 |
epochs=EPOCHS, |
|
|
66 |
seed=SEED, |
|
|
67 |
normalization=norm, |
|
|
68 |
cluster_method='mixture', |
|
|
69 |
use_autoencoders=True, |
|
|
70 |
feature_surv_analysis=True, |
|
|
71 |
distribute=False |
|
|
72 |
# stack_multi_omic=STACK_MULTI_OMIC, |
|
|
73 |
# level_dims_in=LEVEL_DIMS_IN, |
|
|
74 |
# level_dims_out=LEVEL_DIMS_OUT, |
|
|
75 |
# loss=LOSS, |
|
|
76 |
# optimizer=OPTIMIZER, |
|
|
77 |
# act_reg=ACT_REG, |
|
|
78 |
# w_reg=W_REG, |
|
|
79 |
# dropout=DROPOUT, |
|
|
80 |
# data_split=DATA_SPLIT, |
|
|
81 |
# activation=ACTIVATION, |
|
|
82 |
# path_to_save_model=PATH_TO_SAVE_MODEL, |
|
|
83 |
# pvalue_threshold=PVALUE_THRESHOLD, |
|
|
84 |
# nb_selected_features=NB_SELECTED_FEATURES, |
|
|
85 |
) |
|
|
86 |
|
|
|
87 |
boosting.fit() |
|
|
88 |
save_model(boosting, "./test_saved_model") |
|
|
89 |
|
|
|
90 |
del boosting |
|
|
91 |
|
|
|
92 |
boosting = load_model("TestProject", "./test_saved_model") |
|
|
93 |
|
|
|
94 |
boosting.predict_labels_on_full_dataset() |
|
|
95 |
|
|
|
96 |
boosting.compute_clusters_consistency_for_full_labels() |
|
|
97 |
boosting.evalutate_cluster_performance() |
|
|
98 |
boosting.collect_cindex_for_test_fold() |
|
|
99 |
boosting.collect_cindex_for_full_dataset() |
|
|
100 |
|
|
|
101 |
boosting.compute_feature_scores_per_cluster() |
|
|
102 |
boosting.write_feature_score_per_cluster() |
|
|
103 |
boosting.collect_number_of_features_per_omic() |
|
|
104 |
boosting.compute_pvalue_for_merged_test_fold() |
|
|
105 |
|
|
|
106 |
boosting.load_new_test_dataset( |
|
|
107 |
{'RNA': 'rna_dummy.tsv'}, # OMIC file of the test set. It doesnt have to be the same as for training |
|
|
108 |
'dummy', # Name of the test test to be used |
|
|
109 |
'survival_dummy.tsv', # Survival file of the test set Optional |
|
|
110 |
) |
|
|
111 |
|
|
|
112 |
boosting.predict_labels_on_test_dataset() |
|
|
113 |
boosting.compute_c_indexes_for_test_dataset() |
|
|
114 |
boosting.compute_clusters_consistency_for_test_labels() |
|
|
115 |
|
|
|
116 |
boosting.load_new_test_dataset( |
|
|
117 |
{'METH': 'meth_dummy.tsv'}, # OMIC file of the second test set. |
|
|
118 |
'dummy_METH', # Name of the second test test |
|
|
119 |
'survival_dummy.tsv', # Survival file of the test set Optional |
|
|
120 |
) |
|
|
121 |
|
|
|
122 |
boosting.predict_labels_on_test_dataset() |
|
|
123 |
boosting.compute_c_indexes_for_test_dataset() |
|
|
124 |
boosting.compute_clusters_consistency_for_test_labels() |
|
|
125 |
|
|
|
126 |
|
|
|
127 |
if __name__ == '__main__': |
|
|
128 |
test_instance() |