Switch to unified view

a b/exseek/config/evaluate_features.yaml
1
features: null
2
transpose: true
3
4
selector_grid_search: true
5
selector_grid_search_params:
6
  cv:
7
    splitter: StratifiedShuffleSplit
8
    n_splits: 5
9
    test_size: 0.1
10
  iid: false
11
  scoring: roc_auc
12
13
preprocess_steps:
14
  # apply log transformation
15
  - log_transform:
16
      name: LogTransform
17
      type: transformer
18
      enabled: true
19
      params:
20
        base: 2
21
        pseudo_count: 1
22
  # method to scale features across samples
23
  - scale_features:
24
      name: StandardScaler
25
      type: scaler
26
      enabled: true
27
      params:
28
        with_mean: true
29
30
# template for grid_search_params in classifiers
31
classifier_grid_search_params:
32
  cv:
33
    splitter: StratifiedShuffleSplit
34
    n_splits: 5
35
    test_size: 0.1
36
  iid: false
37
  scoring: roc_auc
38
39
classifiers:
40
  LogRegL2:
41
    classifier: LogisticRegression
42
    # parameters for the classifier used for feature selection
43
    classifier_params:
44
      penalty: l2
45
      solver: liblinear
46
    # grid search for hyper-parameters for the classifier
47
    grid_search: true
48
    grid_search_params:
49
      param_grid:
50
        C: [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000]
51
  RandomForest:
52
    classifier: RandomForestClassifier
53
    grid_search: true
54
    grid_search_params:
55
      param_grid:
56
        n_estimators: [25, 50, 75]
57
        max_depth: [3, 4, 5]
58
  RBFSVM:
59
    classifier: SVC
60
    classifier_params:
61
      kernel: rbf
62
      gamma: scale
63
    grid_search: true
64
    grid_search_params:
65
      param_grid:
66
        C: [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000]
67
  DecisionTree:
68
    classifier: DecisionTreeClassifier
69
    grid_search: true
70
    grid_search_params:
71
      param_grid:
72
        max_depth: [2, 3, 4, 5, 6, 7, 8]
73
  MLP:
74
    classifier: MLPClassifier
75
    classifier_params:
76
      activation: relu
77
      solver: adam
78
      max_iter: 40
79
    grid_search: true
80
    grid_search_params:
81
      param_grid:
82
        hidden_layer_sizes: [[50], [100], [150], [200], [250], [300]]
83
84
# cross-validation parameters for performance evaluation
85
cv_params:
86
  splitter: StratifiedShuffleSplit
87
  # number of train-test splits for cross-validation
88
  n_splits: 50
89
  # number or proportion of samples to use as test set
90
  test_size: 0.1
91
  # scoring metric for performance evaluation
92
  scoring: roc_auc
93
# method for computing sample weight
94
#  balanced: compute sample weight from data such that classes are balanced
95
sample_weight: balanced