[53737a]: / examples / example_hyperparameters_tuning.py

Download this file

147 lines (120 with data), 4.5 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""
This example details how to optimize the choice of the hyperparameters to cluster
a multi-omic dataset.
Multiple objective criteria can be used, such as model final cox-PH pvalues,
cox-PH pvalues for agglomerated out-of-bags samples, cluster consistency,
c-index for out-of-bags samples or for the full labels, mix score, or sum of the pvalues
Sum of pvalues formula:
sum_log_pval = - np.log10(1e-128 + full_model_pvalue) - np.log10(1e-128 + test_fold_pvalue)
Mix score formula:
mix_score = sum_log_pval * cluster_consistency * test_fold_cindex
"""
from os.path import abspath
from os.path import split
from simdeep.simdeep_tuning import SimDeepTuning
import ray
def test_instance():
"""
example of SimDeepBoosting
"""
PATH_DATA = '{0}/../examples/data/'.format(split(abspath(__file__))[0])
#Input file
TRAINING_TSV = {'RNA': 'rna_dummy.tsv', 'METH': 'meth_dummy.tsv'}
SURVIVAL_TSV = 'survival_dummy.tsv'
PROJECT_NAME = 'TestProjectTuning'
nb_threads = 2 # Number of processes to be used to fit individual survival models
### Below are examples of parameters that can parsed in the hyperparameter tuning
################ AUTOENCODER PARAMETERS ################
# LEVEL_DIMS_IN = [250]
# LEVEL_DIMS_OUT = [250]
# LOSS = 'binary_crossentropy'
# OPTIMIZER = 'adam'
# ACT_REG = 0
# W_REG = 0
# DROPOUT = 0.5
# DATA_SPLIT = 0
# ACTIVATION = 'tanh'
#########################################################
################ ADDITIONAL PARAMETERS ##################
# stack_multi_omic=STACK_MULTI_OMIC,
# level_dims_in=LEVEL_DIMS_IN,
# level_dims_out=LEVEL_DIMS_OUT,
# loss=LOSS,
# optimizer=OPTIMIZER,
# act_reg=ACT_REG,
# w_reg=W_REG,
# dropout=DROPOUT,
# data_split=DATA_SPLIT,
# activation=ACTIVATION,
# path_to_save_model=PATH_TO_SAVE_MODEL,
# pvalue_threshold=PVALUE_THRESHOLD,
# nb_selected_features=NB_SELECTED_FEATURES,
# pvalue_threshold = 0.01
# nb_selected_features = 10
# stack_multi_omic = False
# use_autoencoders = True
# feature_surv_analysis = True
#########################################################
# ray.init(num_cpus=3)
# AgglomerativeClustering is an external class that can be used as
# a clustering algorithm since it has a fit_predict method
from sklearn.cluster import AgglomerativeClustering
args_to_optimize = {
'seed': [100, 200, 300, 400],
# 'nb_clusters': [2, 5],
'cluster_method': [
'mixture', # Gaussian mixture
'coxPHM', # coxPH fitting and dichotomization
'coxPHMixture', # coxPH fitting and gaussian mixture on the predicted time (1D)
AgglomerativeClustering # scikit-learn hierarchical clustering
],
'normalization': ['default', 'alternative']
# 'use_autoencoders': (True, False),
# 'class_selection': ('mean', 'max'),
}
# Different normalisations can be tested
from sklearn.preprocessing import RobustScaler
# An external normalisation class can be used
# it requires the class to have fit and fit_transform method
normalization = {
'default': {
'NB_FEATURES_TO_KEEP': 100,
'TRAIN_RANK_NORM': True,
'TRAIN_CORR_REDUCTION': True,
'TRAIN_CORR_RANK_NORM': True,
},
'alternative': {
'CUSTOM': RobustScaler,
}
}
tuning = SimDeepTuning(
args_to_optimize=args_to_optimize,
nb_threads=nb_threads,
survival_tsv=SURVIVAL_TSV,
training_tsv=TRAINING_TSV,
path_data=PATH_DATA,
project_name=PROJECT_NAME,
path_results=PATH_DATA,
normalization=normalization,
)
# Possible metrics for evaluating training set: {
# "cluster_consistency",
# "full_pvalue",
# "sum_log_pval",
# "test_fold_cindex",
# "test_fold_pval",
# "mix_score",
# }
ray.init(num_cpus=8)
tuning.fit(
metric='log_test_fold_pvalue',
num_samples=8,
max_concurrent=2,
distribute_deepprog=True,
# iterations is usefull to take into account the DL parameter fitting variations
iterations=1)
table = tuning.get_results_table()
tuning.save_results_table()
ray.shutdown()
if __name__ == '__main__':
test_instance()