[53737a]: / examples / load_and_save_models.py

Download this file

129 lines (105 with data), 4.0 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os
os.environ['PYTHONHASHSEED']=str(2020)
import random
random.seed(2020)
from os.path import abspath
from os.path import split
from simdeep.simdeep_boosting import SimDeepBoosting
from simdeep.simdeep_utils import save_model
from simdeep.simdeep_utils import load_model
def test_instance():
"""
example of SimDeepBoosting
"""
PATH_DATA = '{0}/../examples/data/'.format(split(abspath(__file__))[0])
#Input file
TRAINING_TSV = {'RNA': 'rna_dummy.tsv', 'METH': 'meth_dummy.tsv'}
SURVIVAL_TSV = 'survival_dummy.tsv'
PROJECT_NAME = 'TestProject'
SEED = 3
nb_it = 2 # Number of models to be built
nb_threads = 2 # Number of processes to be used to fit individual survival models
################ AUTOENCODER PARAMETERS ################
EPOCHS = 10
## Additional parameters for the autoencoders can be defined, see config.py file for details
# LEVEL_DIMS_IN = [250]
# LEVEL_DIMS_OUT = [250]
# LOSS = 'binary_crossentropy'
# OPTIMIZER = 'adam'
# ACT_REG = 0
# W_REG = 0
# DROPOUT = 0.5
# DATA_SPLIT = 0
# ACTIVATION = 'tanh'
#########################################################
################ ADDITIONAL PARAMETERS ##################
# PATH_TO_SAVE_MODEL = '/home/username/deepprog'
# PVALUE_THRESHOLD = 0.01
# NB_SELECTED_FEATURES = 10
# STACK_MULTI_OMIC = False
#########################################################
from sklearn.preprocessing import RobustScaler
norm = {
'CUSTOM': RobustScaler,
}
boosting = SimDeepBoosting(
nb_threads=nb_threads,
nb_it=nb_it,
split_n_fold=3,
survival_tsv=SURVIVAL_TSV,
training_tsv=TRAINING_TSV,
path_data=PATH_DATA,
project_name=PROJECT_NAME,
path_results=PATH_DATA,
epochs=EPOCHS,
seed=SEED,
normalization=norm,
cluster_method='mixture',
use_autoencoders=True,
feature_surv_analysis=True,
distribute=False
# stack_multi_omic=STACK_MULTI_OMIC,
# level_dims_in=LEVEL_DIMS_IN,
# level_dims_out=LEVEL_DIMS_OUT,
# loss=LOSS,
# optimizer=OPTIMIZER,
# act_reg=ACT_REG,
# w_reg=W_REG,
# dropout=DROPOUT,
# data_split=DATA_SPLIT,
# activation=ACTIVATION,
# path_to_save_model=PATH_TO_SAVE_MODEL,
# pvalue_threshold=PVALUE_THRESHOLD,
# nb_selected_features=NB_SELECTED_FEATURES,
)
boosting.fit()
save_model(boosting, "./test_saved_model")
del boosting
boosting = load_model("TestProject", "./test_saved_model")
boosting.predict_labels_on_full_dataset()
boosting.compute_clusters_consistency_for_full_labels()
boosting.evalutate_cluster_performance()
boosting.collect_cindex_for_test_fold()
boosting.collect_cindex_for_full_dataset()
boosting.compute_feature_scores_per_cluster()
boosting.write_feature_score_per_cluster()
boosting.collect_number_of_features_per_omic()
boosting.compute_pvalue_for_merged_test_fold()
boosting.load_new_test_dataset(
{'RNA': 'rna_dummy.tsv'}, # OMIC file of the test set. It doesnt have to be the same as for training
'dummy', # Name of the test test to be used
'survival_dummy.tsv', # Survival file of the test set Optional
)
boosting.predict_labels_on_test_dataset()
boosting.compute_c_indexes_for_test_dataset()
boosting.compute_clusters_consistency_for_test_labels()
boosting.load_new_test_dataset(
{'METH': 'meth_dummy.tsv'}, # OMIC file of the second test set.
'dummy_METH', # Name of the second test test
'survival_dummy.tsv', # Survival file of the test set Optional
)
boosting.predict_labels_on_test_dataset()
boosting.compute_c_indexes_for_test_dataset()
boosting.compute_clusters_consistency_for_test_labels()
if __name__ == '__main__':
test_instance()