|
a |
|
b/src/config/config.py |
|
|
1 |
""" |
|
|
2 |
Hyperparameter search-space configuration. |
|
|
3 |
""" |
|
|
4 |
|
|
|
5 |
from ray import tune |
|
|
6 |
import numpy as np |
|
|
7 |
import pandas as pd |
|
|
8 |
|
|
|
9 |
N_SAMPLES = [256] |
|
|
10 |
DECAY_WEIGHTS = [0.2, 0.4, 0.6, 0.8, 0.9, 1] |
|
|
11 |
TEMPERATURES = [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] |
|
|
12 |
LOG_WEIGHTS = [1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2] |
|
|
13 |
HIDDEN_DIMS = [64, 128, 256] |
|
|
14 |
N_LAYERS = [3, 4] |
|
|
15 |
N_HEADS = [12, 16, 24] |
|
|
16 |
|
|
|
17 |
|
|
|
18 |
# Conditional hyper-param functions |
|
|
19 |
def get_dropout_from_n_layers(spec): |
|
|
20 |
""" |
|
|
21 |
Returns dropout of 0 if n_layers==1 |
|
|
22 |
else random dropout. |
|
|
23 |
""" |
|
|
24 |
if spec.config.model.n_layers == 1: |
|
|
25 |
return 0 |
|
|
26 |
else: |
|
|
27 |
return np.random.choice([0, 0.1, 0.2, 0.4]) |
|
|
28 |
|
|
|
29 |
|
|
|
30 |
# Hyperparameter search-space |
|
|
31 |
config_generic = { |
|
|
32 |
"lr": tune.grid_search([1e-4, 1e-3, 1e-2]), |
|
|
33 |
"optimizer": "SGD", # tune.choice(['Adam','SGD']), |
|
|
34 |
"momentum": 0.9, # tune.grid_search(DECAY_WEIGHTS), |
|
|
35 |
"train_epochs": 100, |
|
|
36 |
"train_mb_size": tune.grid_search([16, 32, 64, 128]), |
|
|
37 |
} |
|
|
38 |
|
|
|
39 |
config_model = { |
|
|
40 |
"CNN": { |
|
|
41 |
"hidden_dim": tune.grid_search(HIDDEN_DIMS), |
|
|
42 |
"n_layers": tune.grid_search(N_LAYERS), |
|
|
43 |
"kernel_size": tune.grid_search([3, 5, 7]), |
|
|
44 |
"nonlinearity": tune.grid_search(["tanh", "relu"]), |
|
|
45 |
}, |
|
|
46 |
"MLP": { |
|
|
47 |
"hidden_dim": tune.grid_search(HIDDEN_DIMS), |
|
|
48 |
"n_layers": tune.grid_search(N_LAYERS), |
|
|
49 |
"dropout": tune.sample_from(get_dropout_from_n_layers), |
|
|
50 |
"nonlinearity": tune.grid_search(["tanh", "relu"]), |
|
|
51 |
}, |
|
|
52 |
"Transformer": { |
|
|
53 |
"hidden_dim": tune.grid_search(HIDDEN_DIMS), |
|
|
54 |
"n_layers": tune.grid_search(N_LAYERS), |
|
|
55 |
"n_heads": tune.grid_search(N_HEADS), |
|
|
56 |
"dropout": tune.sample_from(get_dropout_from_n_layers), |
|
|
57 |
"nonlinearity": tune.grid_search(["relu", "gelu"]), |
|
|
58 |
}, |
|
|
59 |
"RNN": { |
|
|
60 |
"hidden_dim": tune.grid_search(HIDDEN_DIMS), |
|
|
61 |
"n_layers": tune.grid_search(N_LAYERS), |
|
|
62 |
"dropout": tune.sample_from(get_dropout_from_n_layers), |
|
|
63 |
"nonlinearity": tune.grid_search(["tanh", "relu"]), |
|
|
64 |
"bidirectional": tune.grid_search([True, False]), |
|
|
65 |
}, |
|
|
66 |
"LSTM": { |
|
|
67 |
"hidden_dim": tune.grid_search(HIDDEN_DIMS), |
|
|
68 |
"n_layers": tune.grid_search(N_LAYERS), |
|
|
69 |
"dropout": tune.sample_from(get_dropout_from_n_layers), |
|
|
70 |
"bidirectional": tune.grid_search([True, False]), |
|
|
71 |
}, |
|
|
72 |
"GRU": { |
|
|
73 |
"hidden_dim": tune.grid_search(HIDDEN_DIMS), |
|
|
74 |
"n_layers": tune.grid_search(N_LAYERS), |
|
|
75 |
"dropout": tune.sample_from(get_dropout_from_n_layers), |
|
|
76 |
"bidirectional": tune.grid_search([True, False]), |
|
|
77 |
}, |
|
|
78 |
} |
|
|
79 |
|
|
|
80 |
config_cl = { |
|
|
81 |
"Replay": { |
|
|
82 |
"mem_size": tune.grid_search([5 * n for n in N_SAMPLES]) |
|
|
83 |
# JA: Have edited this in replay definition |
|
|
84 |
#'storage_policy':storage_policy.ClassBalancedStoragePolicy() |
|
|
85 |
}, |
|
|
86 |
"GDumb": {"mem_size": tune.grid_search(N_SAMPLES)}, |
|
|
87 |
"EWC": {"mode": "separate", "ewc_lambda": tune.grid_search(LOG_WEIGHTS)}, |
|
|
88 |
"OnlineEWC": { |
|
|
89 |
"mode": "online", |
|
|
90 |
"ewc_lambda": tune.grid_search(LOG_WEIGHTS), |
|
|
91 |
"decay_factor": tune.grid_search(DECAY_WEIGHTS), |
|
|
92 |
}, |
|
|
93 |
"SI": {"si_lambda": tune.grid_search(LOG_WEIGHTS)}, |
|
|
94 |
"LwF": { |
|
|
95 |
"alpha": tune.grid_search(LOG_WEIGHTS), |
|
|
96 |
"temperature": tune.grid_search(TEMPERATURES), |
|
|
97 |
}, |
|
|
98 |
"LFL": {"lambda_e": tune.grid_search([LOG_WEIGHTS])}, |
|
|
99 |
"GEM": { |
|
|
100 |
"patterns_per_exp": tune.grid_search(N_SAMPLES), |
|
|
101 |
"memory_strength": tune.grid_search(DECAY_WEIGHTS), |
|
|
102 |
}, |
|
|
103 |
"AGEM": { |
|
|
104 |
"patterns_per_exp": tune.grid_search(N_SAMPLES), |
|
|
105 |
"sample_size": tune.grid_search([i * max(N_SAMPLES) for i in range(1, 3)]), |
|
|
106 |
}, |
|
|
107 |
#'CoPE': |
|
|
108 |
} |
|
|
109 |
|
|
|
110 |
vals = { |
|
|
111 |
"mem_size": N_SAMPLES, |
|
|
112 |
"ewc_lambda": LOG_WEIGHTS, |
|
|
113 |
"decay_factor": DECAY_WEIGHTS, |
|
|
114 |
"si_lambda": LOG_WEIGHTS, |
|
|
115 |
"alpha": LOG_WEIGHTS, |
|
|
116 |
"temperature": TEMPERATURES, |
|
|
117 |
"lambda_e": LOG_WEIGHTS, |
|
|
118 |
"patterns_per_exp": N_SAMPLES, |
|
|
119 |
"memory_strength": DECAY_WEIGHTS, |
|
|
120 |
"sample_size": [i * max(N_SAMPLES) for i in range(1, 3)], |
|
|
121 |
} |
|
|
122 |
|
|
|
123 |
vals2 = { |
|
|
124 |
"hidden_dim": HIDDEN_DIMS, |
|
|
125 |
"n_layers": N_LAYERS, |
|
|
126 |
"nonlinearity": ["tanh", "relu", "gelu*"], |
|
|
127 |
"n_heads": N_HEADS, |
|
|
128 |
"bidirectional": ["True", "False"], |
|
|
129 |
} |
|
|
130 |
|
|
|
131 |
models = ["MLP", "CNN", "LSTM", "Transformer"] |
|
|
132 |
for k in vals2.keys(): |
|
|
133 |
vals2[k] = (vals2[k], *[k in config_model[model] for model in models]) |
|
|
134 |
|
|
|
135 |
df_hp = pd.DataFrame(vals.items(), columns=["Hyperparameter", "Values"]) |
|
|
136 |
df_hp = df_hp.set_index(["Hyperparameter"]) |
|
|
137 |
|
|
|
138 |
# print(df_hp.to_latex()) |
|
|
139 |
|
|
|
140 |
df_hp = pd.DataFrame( |
|
|
141 |
((k, *v) for k, v in vals2.items()), columns=["Hyperparameter", "Values", *models] |
|
|
142 |
) |
|
|
143 |
df_hp = df_hp.set_index(["Hyperparameter"]) |
|
|
144 |
|
|
|
145 |
# print(df_hp.to_latex()) |