a b/examples/example_with_dummy_data.py
1
from os.path import abspath
2
from os.path import split
3
4
from simdeep.simdeep_boosting import SimDeepBoosting
5
6
7
def test_instance():
8
    """
9
    example of SimDeepBoosting
10
    """
11
    PATH_DATA = '{0}/../examples/data/'.format(split(abspath(__file__))[0])
12
13
    #Input file
14
    TRAINING_TSV = {'RNA': 'rna_dummy.tsv', 'METH': 'meth_dummy.tsv'}
15
    SURVIVAL_TSV = 'survival_dummy.tsv'
16
17
    # Optional metadata FILE
18
    OPTIONAL_METADATA = "metadata_dummy.tsv"
19
20
    # Subsetting training set with only males from metadata:
21
    SUBSET_TRAINING_WITH_META = {'stage': ['I', 'II', 'III']}
22
23
    PROJECT_NAME = 'TestProject'
24
    SEED = 3
25
    nb_it = 5 # Number of models to be built
26
    nb_threads = 2 # Number of processes to be used to fit individual survival models
27
28
    ################ AUTOENCODER PARAMETERS ################
29
    EPOCHS = 10
30
    ## Additional parameters for the autoencoders can be defined, see config.py file for details
31
    #########################################################
32
33
    ################ ADDITIONAL PARAMETERS ##################
34
    # PATH_TO_SAVE_MODEL = '/home/username/deepprog'
35
    # PVALUE_THRESHOLD = 0.01
36
    # NB_SELECTED_FEATURES = 10
37
    # STACK_MULTI_OMIC = False
38
    #########################################################
39
40
    # IT is possible to define a custom normalisation
41
    # from sklearn.preprocessing import RobustScaler
42
    # norm = {
43
    #         'CUSTOM': RobustScaler,
44
    # }
45
46
    boosting = SimDeepBoosting(
47
        nb_threads=nb_threads,
48
        nb_it=nb_it,
49
        split_n_fold=3,
50
        survival_tsv=SURVIVAL_TSV,
51
        training_tsv=TRAINING_TSV,
52
        # metadata_tsv=OPTIONAL_METADATA, # optional
53
        path_data=PATH_DATA,
54
        project_name=PROJECT_NAME,
55
        path_results=PATH_DATA,
56
        use_r_packages=False, # to use R functions from the survival and survcomp packages
57
        epochs=EPOCHS,
58
        seed=SEED,
59
        # normalization=norm,
60
        cluster_method='coxPH',
61
        metadata_usage='labels',
62
        use_autoencoders=True,
63
        feature_surv_analysis=True,
64
        feature_selection_usage="lasso",
65
        # subset_training_with_meta=SUBSET_TRAINING_WITH_META,
66
        # stack_multi_omic=True,
67
        # path_to_save_model=PATH_TO_SAVE_MODEL,
68
        # pvalue_threshold=PVALUE_THRESHOLD,
69
        # nb_selected_features=NB_SELECTED_FEATURES,
70
    )
71
72
    boosting.fit()
73
    boosting.predict_labels_on_full_dataset()
74
75
    boosting.save_models_classes()
76
    boosting.save_cv_models_classes()
77
78
    boosting.compute_clusters_consistency_for_full_labels()
79
    boosting.evalutate_cluster_performance()
80
    boosting.collect_cindex_for_test_fold()
81
    boosting.collect_cindex_for_full_dataset()
82
83
    boosting.compute_feature_scores_per_cluster()
84
    boosting.compute_survival_feature_scores_per_cluster(pval_thres=0.10)
85
    boosting.write_feature_score_per_cluster()
86
    boosting.collect_number_of_features_per_omic()
87
    boosting.compute_pvalue_for_merged_test_fold()
88
89
    boosting.load_new_test_dataset(
90
        tsv_dict={'RNA': 'rna_dummy.tsv'}, # OMIC file of the test set. It doesnt have to be the same as for training
91
        path_survival_file='survival_dummy.tsv', # Optional survival file of the test set for computing validation log-rank pvalue
92
        fname_key='dummy', # Name of the test test to be used
93
    )
94
95
    boosting.predict_labels_on_test_dataset()
96
    boosting.save_test_models_classes()
97
98
    boosting.compute_c_indexes_for_test_dataset()
99
    boosting.compute_clusters_consistency_for_test_labels()
100
101
    # Experimental method to plot the test dataset amongst the class kernel densities
102
    boosting.plot_supervised_kernel_for_test_sets()
103
    boosting.plot_supervised_predicted_labels_for_test_sets()
104
105
    boosting.load_new_test_dataset(
106
        tsv_dict={'METH': 'meth_dummy.tsv'}, # OMIC file of the second test set.
107
        path_survival_file='survival_dummy.tsv', # Survival file of the test set
108
        fname_key='dummy_METH', # Name of the second test test
109
        metadata_file="metadata_dummy.tsv" # Optional metadata
110
    )
111
112
    boosting.predict_labels_on_test_dataset()
113
    boosting.compute_c_indexes_for_test_dataset()
114
    boosting.compute_clusters_consistency_for_test_labels()
115
116
    # Experimental method to plot the test dataset amongst the class kernel densities
117
    boosting.plot_supervised_kernel_for_test_sets()
118
    boosting.plot_supervised_predicted_labels_for_test_sets()
119
120
121
if __name__ == '__main__':
122
    test_instance()