--- a +++ b/scripts/config/config_earlyfusion.yaml @@ -0,0 +1,68 @@ +save_name: "xgboost_OS" +target: "OS" + + +#### Data paths + pre-processing for each modality (e.g. simple imputation) +clinical_data: + clinical_file: "data/clinicals.csv" + imputation: + numericals: [1, 3, 4, 13, 14, 15, 16, 17, 18, 19, -4] + categoricals: [20, -5, -3] + +radiomics_data: + radiomics_file: "data/radiomics.csv" + preprocessing: + f_log_transform: ['TMTV', 'T_TMTV', 'N1_TMTV', 'N2_TMTV', 'N3_TMTV', 'M1a_TMTV', 'M1bc_TMTV'] + imputation: + numericals: [2] + categoricals: null + +pathomics_data: + pathomics_file: "data/pathomics.csv" + imputation: + numericals: [ 27, 28, 29, 31, 33, 35, 38, 40, 41, 43, 44, 45, 47, 48, 49, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, + 131, 132, 133 ] + categoricals: null + +RNA_data: + RNA_file: "data/omics.csv" + imputation: + numericals: [32] + categoricals: null + + +#### Define classifier + hyperparameters +classifier: + type: "xgboost" + args: + n_jobs: 1 +# type: "LR" +# args: +# penalty: "elasticnet" +# max_iter: 2500 +# solver: "saga" +# class_weight: "balanced" +# C: 0.1 +# l1_ratio: 0.5 + + +#### Define earlyfusion parameters (see multipit.multi_model.earlyfusion) +earlyfusion: + n_repeats: 1OO # number of repeats for 10-fold cross-validation + seed: 3 # random seed + classifier_args: + calibration: True # if True the fused model is calibrated with inner cross-validation + balance_features: False # only availbale for classifier with 'feature_weight' parameter + select_features: True # if True univariate feature selection is used as a pre-processing step + threshold_select: null # minimum performance value for feature selection + max_features: 40 # maximum number of features to select + max_corr: 0.7 # maximum correlation threshold for feature selection + n_jobs: 1 + + +##### Additionnal parameters +parallelization: # number of jobs for dealing with several repeats in parallel (conflict with n_jobs of latefusion) + n_jobs_repeats: 1 + +collect_thresholds: False # collect threshold that optimizes log-rank test on the training set