Diff of /src/config/config.py [000000] .. [66326d]

Switch to unified view

a b/src/config/config.py
1
"""
2
Hyperparameter search-space configuration.
3
"""
4
5
from ray import tune
6
import numpy as np
7
import pandas as pd
8
9
N_SAMPLES = [256]
10
DECAY_WEIGHTS = [0.2, 0.4, 0.6, 0.8, 0.9, 1]
11
TEMPERATURES = [0.5, 1.0, 1.5, 2.0, 2.5, 3.0]
12
LOG_WEIGHTS = [1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2]
13
HIDDEN_DIMS = [64, 128, 256]
14
N_LAYERS = [3, 4]
15
N_HEADS = [12, 16, 24]
16
17
18
# Conditional hyper-param functions
19
def get_dropout_from_n_layers(spec):
20
    """
21
    Returns dropout of 0 if n_layers==1
22
    else random dropout.
23
    """
24
    if spec.config.model.n_layers == 1:
25
        return 0
26
    else:
27
        return np.random.choice([0, 0.1, 0.2, 0.4])
28
29
30
# Hyperparameter search-space
31
config_generic = {
32
    "lr": tune.grid_search([1e-4, 1e-3, 1e-2]),
33
    "optimizer": "SGD",  # tune.choice(['Adam','SGD']),
34
    "momentum": 0.9,  # tune.grid_search(DECAY_WEIGHTS),
35
    "train_epochs": 100,
36
    "train_mb_size": tune.grid_search([16, 32, 64, 128]),
37
}
38
39
config_model = {
40
    "CNN": {
41
        "hidden_dim": tune.grid_search(HIDDEN_DIMS),
42
        "n_layers": tune.grid_search(N_LAYERS),
43
        "kernel_size": tune.grid_search([3, 5, 7]),
44
        "nonlinearity": tune.grid_search(["tanh", "relu"]),
45
    },
46
    "MLP": {
47
        "hidden_dim": tune.grid_search(HIDDEN_DIMS),
48
        "n_layers": tune.grid_search(N_LAYERS),
49
        "dropout": tune.sample_from(get_dropout_from_n_layers),
50
        "nonlinearity": tune.grid_search(["tanh", "relu"]),
51
    },
52
    "Transformer": {
53
        "hidden_dim": tune.grid_search(HIDDEN_DIMS),
54
        "n_layers": tune.grid_search(N_LAYERS),
55
        "n_heads": tune.grid_search(N_HEADS),
56
        "dropout": tune.sample_from(get_dropout_from_n_layers),
57
        "nonlinearity": tune.grid_search(["relu", "gelu"]),
58
    },
59
    "RNN": {
60
        "hidden_dim": tune.grid_search(HIDDEN_DIMS),
61
        "n_layers": tune.grid_search(N_LAYERS),
62
        "dropout": tune.sample_from(get_dropout_from_n_layers),
63
        "nonlinearity": tune.grid_search(["tanh", "relu"]),
64
        "bidirectional": tune.grid_search([True, False]),
65
    },
66
    "LSTM": {
67
        "hidden_dim": tune.grid_search(HIDDEN_DIMS),
68
        "n_layers": tune.grid_search(N_LAYERS),
69
        "dropout": tune.sample_from(get_dropout_from_n_layers),
70
        "bidirectional": tune.grid_search([True, False]),
71
    },
72
    "GRU": {
73
        "hidden_dim": tune.grid_search(HIDDEN_DIMS),
74
        "n_layers": tune.grid_search(N_LAYERS),
75
        "dropout": tune.sample_from(get_dropout_from_n_layers),
76
        "bidirectional": tune.grid_search([True, False]),
77
    },
78
}
79
80
config_cl = {
81
    "Replay": {
82
        "mem_size": tune.grid_search([5 * n for n in N_SAMPLES])
83
        # JA: Have edited this in replay definition
84
        #'storage_policy':storage_policy.ClassBalancedStoragePolicy()
85
    },
86
    "GDumb": {"mem_size": tune.grid_search(N_SAMPLES)},
87
    "EWC": {"mode": "separate", "ewc_lambda": tune.grid_search(LOG_WEIGHTS)},
88
    "OnlineEWC": {
89
        "mode": "online",
90
        "ewc_lambda": tune.grid_search(LOG_WEIGHTS),
91
        "decay_factor": tune.grid_search(DECAY_WEIGHTS),
92
    },
93
    "SI": {"si_lambda": tune.grid_search(LOG_WEIGHTS)},
94
    "LwF": {
95
        "alpha": tune.grid_search(LOG_WEIGHTS),
96
        "temperature": tune.grid_search(TEMPERATURES),
97
    },
98
    "LFL": {"lambda_e": tune.grid_search([LOG_WEIGHTS])},
99
    "GEM": {
100
        "patterns_per_exp": tune.grid_search(N_SAMPLES),
101
        "memory_strength": tune.grid_search(DECAY_WEIGHTS),
102
    },
103
    "AGEM": {
104
        "patterns_per_exp": tune.grid_search(N_SAMPLES),
105
        "sample_size": tune.grid_search([i * max(N_SAMPLES) for i in range(1, 3)]),
106
    },
107
    #'CoPE':
108
}
109
110
vals = {
111
    "mem_size": N_SAMPLES,
112
    "ewc_lambda": LOG_WEIGHTS,
113
    "decay_factor": DECAY_WEIGHTS,
114
    "si_lambda": LOG_WEIGHTS,
115
    "alpha": LOG_WEIGHTS,
116
    "temperature": TEMPERATURES,
117
    "lambda_e": LOG_WEIGHTS,
118
    "patterns_per_exp": N_SAMPLES,
119
    "memory_strength": DECAY_WEIGHTS,
120
    "sample_size": [i * max(N_SAMPLES) for i in range(1, 3)],
121
}
122
123
vals2 = {
124
    "hidden_dim": HIDDEN_DIMS,
125
    "n_layers": N_LAYERS,
126
    "nonlinearity": ["tanh", "relu", "gelu*"],
127
    "n_heads": N_HEADS,
128
    "bidirectional": ["True", "False"],
129
}
130
131
models = ["MLP", "CNN", "LSTM", "Transformer"]
132
for k in vals2.keys():
133
    vals2[k] = (vals2[k], *[k in config_model[model] for model in models])
134
135
df_hp = pd.DataFrame(vals.items(), columns=["Hyperparameter", "Values"])
136
df_hp = df_hp.set_index(["Hyperparameter"])
137
138
# print(df_hp.to_latex())
139
140
df_hp = pd.DataFrame(
141
    ((k, *v) for k, v in vals2.items()), columns=["Hyperparameter", "Values", *models]
142
)
143
df_hp = df_hp.set_index(["Hyperparameter"])
144
145
# print(df_hp.to_latex())