multipit / Git / Diff of /scripts/config/config

Models:
RichardZick/
multipit
Downloads: 1
Diff of /scripts/config/config_earlyfusion.yaml [000000] .. [efd906]
Switch to side-by-side view

--- a
+++ b/scripts/config/config_earlyfusion.yaml
@@ -0,0 +1,68 @@
+save_name: "xgboost_OS"
+target: "OS"
+
+
+#### Data paths + pre-processing for each modality (e.g. simple imputation)
+clinical_data:
+  clinical_file: "data/clinicals.csv"
+  imputation:
+    numericals: [1, 3, 4, 13, 14, 15, 16, 17, 18, 19, -4]
+    categoricals: [20, -5, -3]
+
+radiomics_data:
+  radiomics_file: "data/radiomics.csv"
+  preprocessing:
+    f_log_transform: ['TMTV', 'T_TMTV', 'N1_TMTV', 'N2_TMTV', 'N3_TMTV', 'M1a_TMTV', 'M1bc_TMTV']
+  imputation:
+    numericals: [2]
+    categoricals: null
+
+pathomics_data:
+  pathomics_file: "data/pathomics.csv"
+  imputation:
+    numericals: [ 27, 28, 29, 31, 33, 35, 38, 40, 41, 43, 44, 45, 47, 48, 49, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                  61, 62, 63, 64, 65, 66, 67, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
+                  131, 132, 133 ]
+    categoricals: null
+
+RNA_data:
+  RNA_file: "data/omics.csv"
+  imputation:
+    numericals: [32]
+    categoricals: null
+
+
+####  Define classifier + hyperparameters
+classifier:
+  type: "xgboost"
+  args:
+    n_jobs: 1
+#  type: "LR"
+#  args:
+#    penalty: "elasticnet"
+#    max_iter: 2500
+#    solver: "saga"
+#    class_weight: "balanced"
+#    C: 0.1
+#    l1_ratio: 0.5
+
+
+#### Define earlyfusion parameters (see multipit.multi_model.earlyfusion)
+earlyfusion:
+  n_repeats: 1OO # number of repeats for 10-fold cross-validation
+  seed: 3 # random seed
+  classifier_args:
+    calibration: True # if True the fused model is calibrated with inner cross-validation
+    balance_features: False # only availbale for classifier with 'feature_weight' parameter
+    select_features: True # if True univariate feature selection is used as a pre-processing step
+    threshold_select: null # minimum performance value for feature selection
+    max_features: 40 # maximum number of features to select
+    max_corr: 0.7 # maximum correlation threshold for feature selection
+    n_jobs: 1
+
+
+##### Additionnal parameters
+parallelization: # number of jobs for dealing with several repeats in parallel (conflict with n_jobs of latefusion)
+  n_jobs_repeats: 1
+
+collect_thresholds: False # collect threshold that optimizes log-rank test on the training set