a b/examples/load_and_save_models.py
1
import os
2
os.environ['PYTHONHASHSEED']=str(2020)
3
4
import random
5
random.seed(2020)
6
7
from os.path import abspath
8
from os.path import split
9
10
from simdeep.simdeep_boosting import SimDeepBoosting
11
from simdeep.simdeep_utils import save_model
12
from simdeep.simdeep_utils import load_model
13
14
15
def test_instance():
16
    """
17
    example of SimDeepBoosting
18
    """
19
    PATH_DATA = '{0}/../examples/data/'.format(split(abspath(__file__))[0])
20
21
    #Input file
22
    TRAINING_TSV = {'RNA': 'rna_dummy.tsv', 'METH': 'meth_dummy.tsv'}
23
    SURVIVAL_TSV = 'survival_dummy.tsv'
24
25
    PROJECT_NAME = 'TestProject'
26
    SEED = 3
27
    nb_it = 2 # Number of models to be built
28
    nb_threads = 2 # Number of processes to be used to fit individual survival models
29
30
    ################ AUTOENCODER PARAMETERS ################
31
    EPOCHS = 10
32
    ## Additional parameters for the autoencoders can be defined, see config.py file for details
33
    # LEVEL_DIMS_IN = [250]
34
    # LEVEL_DIMS_OUT = [250]
35
    # LOSS = 'binary_crossentropy'
36
    # OPTIMIZER = 'adam'
37
    # ACT_REG = 0
38
    # W_REG = 0
39
    # DROPOUT = 0.5
40
    # DATA_SPLIT = 0
41
    # ACTIVATION = 'tanh'
42
    #########################################################
43
44
    ################ ADDITIONAL PARAMETERS ##################
45
    # PATH_TO_SAVE_MODEL = '/home/username/deepprog'
46
    # PVALUE_THRESHOLD = 0.01
47
    # NB_SELECTED_FEATURES = 10
48
    # STACK_MULTI_OMIC = False
49
    #########################################################
50
51
    from sklearn.preprocessing import RobustScaler
52
    norm = {
53
            'CUSTOM': RobustScaler,
54
    }
55
56
    boosting = SimDeepBoosting(
57
        nb_threads=nb_threads,
58
        nb_it=nb_it,
59
        split_n_fold=3,
60
        survival_tsv=SURVIVAL_TSV,
61
        training_tsv=TRAINING_TSV,
62
        path_data=PATH_DATA,
63
        project_name=PROJECT_NAME,
64
        path_results=PATH_DATA,
65
        epochs=EPOCHS,
66
        seed=SEED,
67
        normalization=norm,
68
        cluster_method='mixture',
69
        use_autoencoders=True,
70
        feature_surv_analysis=True,
71
        distribute=False
72
        # stack_multi_omic=STACK_MULTI_OMIC,
73
        # level_dims_in=LEVEL_DIMS_IN,
74
        # level_dims_out=LEVEL_DIMS_OUT,
75
        # loss=LOSS,
76
        # optimizer=OPTIMIZER,
77
        # act_reg=ACT_REG,
78
        # w_reg=W_REG,
79
        # dropout=DROPOUT,
80
        # data_split=DATA_SPLIT,
81
        # activation=ACTIVATION,
82
        # path_to_save_model=PATH_TO_SAVE_MODEL,
83
        # pvalue_threshold=PVALUE_THRESHOLD,
84
        # nb_selected_features=NB_SELECTED_FEATURES,
85
    )
86
87
    boosting.fit()
88
    save_model(boosting, "./test_saved_model")
89
90
    del boosting
91
92
    boosting = load_model("TestProject", "./test_saved_model")
93
94
    boosting.predict_labels_on_full_dataset()
95
96
    boosting.compute_clusters_consistency_for_full_labels()
97
    boosting.evalutate_cluster_performance()
98
    boosting.collect_cindex_for_test_fold()
99
    boosting.collect_cindex_for_full_dataset()
100
101
    boosting.compute_feature_scores_per_cluster()
102
    boosting.write_feature_score_per_cluster()
103
    boosting.collect_number_of_features_per_omic()
104
    boosting.compute_pvalue_for_merged_test_fold()
105
106
    boosting.load_new_test_dataset(
107
        {'RNA': 'rna_dummy.tsv'}, # OMIC file of the test set. It doesnt have to be the same as for training
108
        'dummy', # Name of the test test to be used
109
        'survival_dummy.tsv', # Survival file of the test set Optional
110
    )
111
112
    boosting.predict_labels_on_test_dataset()
113
    boosting.compute_c_indexes_for_test_dataset()
114
    boosting.compute_clusters_consistency_for_test_labels()
115
116
    boosting.load_new_test_dataset(
117
        {'METH': 'meth_dummy.tsv'}, # OMIC file of the second test set.
118
        'dummy_METH', # Name of the second test test
119
        'survival_dummy.tsv', # Survival file of the test set Optional
120
    )
121
122
    boosting.predict_labels_on_test_dataset()
123
    boosting.compute_c_indexes_for_test_dataset()
124
    boosting.compute_clusters_consistency_for_test_labels()
125
126
127
if __name__ == '__main__':
128
    test_instance()