|
a |
|
b/examples/example_with_dummy_data.py |
|
|
1 |
from os.path import abspath |
|
|
2 |
from os.path import split |
|
|
3 |
|
|
|
4 |
from simdeep.simdeep_boosting import SimDeepBoosting |
|
|
5 |
|
|
|
6 |
|
|
|
7 |
def test_instance(): |
|
|
8 |
""" |
|
|
9 |
example of SimDeepBoosting |
|
|
10 |
""" |
|
|
11 |
PATH_DATA = '{0}/../examples/data/'.format(split(abspath(__file__))[0]) |
|
|
12 |
|
|
|
13 |
#Input file |
|
|
14 |
TRAINING_TSV = {'RNA': 'rna_dummy.tsv', 'METH': 'meth_dummy.tsv'} |
|
|
15 |
SURVIVAL_TSV = 'survival_dummy.tsv' |
|
|
16 |
|
|
|
17 |
# Optional metadata FILE |
|
|
18 |
OPTIONAL_METADATA = "metadata_dummy.tsv" |
|
|
19 |
|
|
|
20 |
# Subsetting training set with only males from metadata: |
|
|
21 |
SUBSET_TRAINING_WITH_META = {'stage': ['I', 'II', 'III']} |
|
|
22 |
|
|
|
23 |
PROJECT_NAME = 'TestProject' |
|
|
24 |
SEED = 3 |
|
|
25 |
nb_it = 5 # Number of models to be built |
|
|
26 |
nb_threads = 2 # Number of processes to be used to fit individual survival models |
|
|
27 |
|
|
|
28 |
################ AUTOENCODER PARAMETERS ################ |
|
|
29 |
EPOCHS = 10 |
|
|
30 |
## Additional parameters for the autoencoders can be defined, see config.py file for details |
|
|
31 |
######################################################### |
|
|
32 |
|
|
|
33 |
################ ADDITIONAL PARAMETERS ################## |
|
|
34 |
# PATH_TO_SAVE_MODEL = '/home/username/deepprog' |
|
|
35 |
# PVALUE_THRESHOLD = 0.01 |
|
|
36 |
# NB_SELECTED_FEATURES = 10 |
|
|
37 |
# STACK_MULTI_OMIC = False |
|
|
38 |
######################################################### |
|
|
39 |
|
|
|
40 |
# IT is possible to define a custom normalisation |
|
|
41 |
# from sklearn.preprocessing import RobustScaler |
|
|
42 |
# norm = { |
|
|
43 |
# 'CUSTOM': RobustScaler, |
|
|
44 |
# } |
|
|
45 |
|
|
|
46 |
boosting = SimDeepBoosting( |
|
|
47 |
nb_threads=nb_threads, |
|
|
48 |
nb_it=nb_it, |
|
|
49 |
split_n_fold=3, |
|
|
50 |
survival_tsv=SURVIVAL_TSV, |
|
|
51 |
training_tsv=TRAINING_TSV, |
|
|
52 |
# metadata_tsv=OPTIONAL_METADATA, # optional |
|
|
53 |
path_data=PATH_DATA, |
|
|
54 |
project_name=PROJECT_NAME, |
|
|
55 |
path_results=PATH_DATA, |
|
|
56 |
use_r_packages=False, # to use R functions from the survival and survcomp packages |
|
|
57 |
epochs=EPOCHS, |
|
|
58 |
seed=SEED, |
|
|
59 |
# normalization=norm, |
|
|
60 |
cluster_method='coxPH', |
|
|
61 |
metadata_usage='labels', |
|
|
62 |
use_autoencoders=True, |
|
|
63 |
feature_surv_analysis=True, |
|
|
64 |
feature_selection_usage="lasso", |
|
|
65 |
# subset_training_with_meta=SUBSET_TRAINING_WITH_META, |
|
|
66 |
# stack_multi_omic=True, |
|
|
67 |
# path_to_save_model=PATH_TO_SAVE_MODEL, |
|
|
68 |
# pvalue_threshold=PVALUE_THRESHOLD, |
|
|
69 |
# nb_selected_features=NB_SELECTED_FEATURES, |
|
|
70 |
) |
|
|
71 |
|
|
|
72 |
boosting.fit() |
|
|
73 |
boosting.predict_labels_on_full_dataset() |
|
|
74 |
|
|
|
75 |
boosting.save_models_classes() |
|
|
76 |
boosting.save_cv_models_classes() |
|
|
77 |
|
|
|
78 |
boosting.compute_clusters_consistency_for_full_labels() |
|
|
79 |
boosting.evalutate_cluster_performance() |
|
|
80 |
boosting.collect_cindex_for_test_fold() |
|
|
81 |
boosting.collect_cindex_for_full_dataset() |
|
|
82 |
|
|
|
83 |
boosting.compute_feature_scores_per_cluster() |
|
|
84 |
boosting.compute_survival_feature_scores_per_cluster(pval_thres=0.10) |
|
|
85 |
boosting.write_feature_score_per_cluster() |
|
|
86 |
boosting.collect_number_of_features_per_omic() |
|
|
87 |
boosting.compute_pvalue_for_merged_test_fold() |
|
|
88 |
|
|
|
89 |
boosting.load_new_test_dataset( |
|
|
90 |
tsv_dict={'RNA': 'rna_dummy.tsv'}, # OMIC file of the test set. It doesnt have to be the same as for training |
|
|
91 |
path_survival_file='survival_dummy.tsv', # Optional survival file of the test set for computing validation log-rank pvalue |
|
|
92 |
fname_key='dummy', # Name of the test test to be used |
|
|
93 |
) |
|
|
94 |
|
|
|
95 |
boosting.predict_labels_on_test_dataset() |
|
|
96 |
boosting.save_test_models_classes() |
|
|
97 |
|
|
|
98 |
boosting.compute_c_indexes_for_test_dataset() |
|
|
99 |
boosting.compute_clusters_consistency_for_test_labels() |
|
|
100 |
|
|
|
101 |
# Experimental method to plot the test dataset amongst the class kernel densities |
|
|
102 |
boosting.plot_supervised_kernel_for_test_sets() |
|
|
103 |
boosting.plot_supervised_predicted_labels_for_test_sets() |
|
|
104 |
|
|
|
105 |
boosting.load_new_test_dataset( |
|
|
106 |
tsv_dict={'METH': 'meth_dummy.tsv'}, # OMIC file of the second test set. |
|
|
107 |
path_survival_file='survival_dummy.tsv', # Survival file of the test set |
|
|
108 |
fname_key='dummy_METH', # Name of the second test test |
|
|
109 |
metadata_file="metadata_dummy.tsv" # Optional metadata |
|
|
110 |
) |
|
|
111 |
|
|
|
112 |
boosting.predict_labels_on_test_dataset() |
|
|
113 |
boosting.compute_c_indexes_for_test_dataset() |
|
|
114 |
boosting.compute_clusters_consistency_for_test_labels() |
|
|
115 |
|
|
|
116 |
# Experimental method to plot the test dataset amongst the class kernel densities |
|
|
117 |
boosting.plot_supervised_kernel_for_test_sets() |
|
|
118 |
boosting.plot_supervised_predicted_labels_for_test_sets() |
|
|
119 |
|
|
|
120 |
|
|
|
121 |
if __name__ == '__main__': |
|
|
122 |
test_instance() |