[53737a]: / examples / example_with_dummy_data.py

Download this file

123 lines (98 with data), 4.5 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from os.path import abspath
from os.path import split
from simdeep.simdeep_boosting import SimDeepBoosting
def test_instance():
"""
example of SimDeepBoosting
"""
PATH_DATA = '{0}/../examples/data/'.format(split(abspath(__file__))[0])
#Input file
TRAINING_TSV = {'RNA': 'rna_dummy.tsv', 'METH': 'meth_dummy.tsv'}
SURVIVAL_TSV = 'survival_dummy.tsv'
# Optional metadata FILE
OPTIONAL_METADATA = "metadata_dummy.tsv"
# Subsetting training set with only males from metadata:
SUBSET_TRAINING_WITH_META = {'stage': ['I', 'II', 'III']}
PROJECT_NAME = 'TestProject'
SEED = 3
nb_it = 5 # Number of models to be built
nb_threads = 2 # Number of processes to be used to fit individual survival models
################ AUTOENCODER PARAMETERS ################
EPOCHS = 10
## Additional parameters for the autoencoders can be defined, see config.py file for details
#########################################################
################ ADDITIONAL PARAMETERS ##################
# PATH_TO_SAVE_MODEL = '/home/username/deepprog'
# PVALUE_THRESHOLD = 0.01
# NB_SELECTED_FEATURES = 10
# STACK_MULTI_OMIC = False
#########################################################
# IT is possible to define a custom normalisation
# from sklearn.preprocessing import RobustScaler
# norm = {
# 'CUSTOM': RobustScaler,
# }
boosting = SimDeepBoosting(
nb_threads=nb_threads,
nb_it=nb_it,
split_n_fold=3,
survival_tsv=SURVIVAL_TSV,
training_tsv=TRAINING_TSV,
# metadata_tsv=OPTIONAL_METADATA, # optional
path_data=PATH_DATA,
project_name=PROJECT_NAME,
path_results=PATH_DATA,
use_r_packages=False, # to use R functions from the survival and survcomp packages
epochs=EPOCHS,
seed=SEED,
# normalization=norm,
cluster_method='coxPH',
metadata_usage='labels',
use_autoencoders=True,
feature_surv_analysis=True,
feature_selection_usage="lasso",
# subset_training_with_meta=SUBSET_TRAINING_WITH_META,
# stack_multi_omic=True,
# path_to_save_model=PATH_TO_SAVE_MODEL,
# pvalue_threshold=PVALUE_THRESHOLD,
# nb_selected_features=NB_SELECTED_FEATURES,
)
boosting.fit()
boosting.predict_labels_on_full_dataset()
boosting.save_models_classes()
boosting.save_cv_models_classes()
boosting.compute_clusters_consistency_for_full_labels()
boosting.evalutate_cluster_performance()
boosting.collect_cindex_for_test_fold()
boosting.collect_cindex_for_full_dataset()
boosting.compute_feature_scores_per_cluster()
boosting.compute_survival_feature_scores_per_cluster(pval_thres=0.10)
boosting.write_feature_score_per_cluster()
boosting.collect_number_of_features_per_omic()
boosting.compute_pvalue_for_merged_test_fold()
boosting.load_new_test_dataset(
tsv_dict={'RNA': 'rna_dummy.tsv'}, # OMIC file of the test set. It doesnt have to be the same as for training
path_survival_file='survival_dummy.tsv', # Optional survival file of the test set for computing validation log-rank pvalue
fname_key='dummy', # Name of the test test to be used
)
boosting.predict_labels_on_test_dataset()
boosting.save_test_models_classes()
boosting.compute_c_indexes_for_test_dataset()
boosting.compute_clusters_consistency_for_test_labels()
# Experimental method to plot the test dataset amongst the class kernel densities
boosting.plot_supervised_kernel_for_test_sets()
boosting.plot_supervised_predicted_labels_for_test_sets()
boosting.load_new_test_dataset(
tsv_dict={'METH': 'meth_dummy.tsv'}, # OMIC file of the second test set.
path_survival_file='survival_dummy.tsv', # Survival file of the test set
fname_key='dummy_METH', # Name of the second test test
metadata_file="metadata_dummy.tsv" # Optional metadata
)
boosting.predict_labels_on_test_dataset()
boosting.compute_c_indexes_for_test_dataset()
boosting.compute_clusters_consistency_for_test_labels()
# Experimental method to plot the test dataset amongst the class kernel densities
boosting.plot_supervised_kernel_for_test_sets()
boosting.plot_supervised_predicted_labels_for_test_sets()
if __name__ == '__main__':
test_instance()