|
a |
|
b/src/experiments/README.md |
|
|
1 |
# Experiments |
|
|
2 |
|
|
|
3 |
This module contains the code to run the experiments. Each Python file contains the code to run the experiments for a specific method. For example, the `./bilstm.py` file contains the following experiments: |
|
|
4 |
|
|
|
5 |
```Python |
|
|
6 |
# runs experiments on the n2c2 corpus using the BiLSTM model in a passive learning setting |
|
|
7 |
bilstm_passive_learning_n2c2() |
|
|
8 |
|
|
|
9 |
# runs experiments on the n2c2 corpus using the BiLSTM model in an active learning setting |
|
|
10 |
bilstm_active_learning_n2c2() |
|
|
11 |
|
|
|
12 |
# runs experiments on the DDI corpus using the BiLSTM model in a passive learning setting |
|
|
13 |
bilstm_passive_learning_ddi() |
|
|
14 |
|
|
|
15 |
# runs experiments on the DDI corpus using the BiLSTM model in an active learning setting |
|
|
16 |
bilstm_active_learning_ddi() |
|
|
17 |
``` |
|
|
18 |
|
|
|
19 |
The function `bilstm_active_learning_n2c2()` will run the active learning experiment with the BiLSTM model on the n2c2 corpus by executing the following code: |
|
|
20 |
|
|
|
21 |
```Python |
|
|
22 |
# Base Dependencies |
|
|
23 |
# ----------------- |
|
|
24 |
from copy import deepcopy |
|
|
25 |
from pathlib import Path |
|
|
26 |
from os.path import join as pjoin |
|
|
27 |
|
|
|
28 |
# Local Dependencies |
|
|
29 |
# ------------------ |
|
|
30 |
from training.config import BaalExperimentConfig |
|
|
31 |
from training.bilstm import BilstmTrainer |
|
|
32 |
from utils import set_seed |
|
|
33 |
|
|
|
34 |
# 3rd-Party Dependencies |
|
|
35 |
# ---------------------- |
|
|
36 |
from datasets import load_from_disk |
|
|
37 |
|
|
|
38 |
# Constants |
|
|
39 |
# ---------- |
|
|
40 |
from constants import ( |
|
|
41 |
N2C2_HF_TRAIN_PATH, |
|
|
42 |
N2C2_HF_TEST_PATH, |
|
|
43 |
N2C2_REL_TYPES, |
|
|
44 |
BAAL_QUERY_STRATEGIES, |
|
|
45 |
EXP_RANDOM_SEEDS |
|
|
46 |
) |
|
|
47 |
MODEL_NAME = "bilstm" |
|
|
48 |
REPETITIONS = 5 |
|
|
49 |
INITIAL_REPETITION = 0 |
|
|
50 |
FINAL_REPETITION = INITIAL_REPETITION + REPETITIONS |
|
|
51 |
|
|
|
52 |
|
|
|
53 |
# Experiment configuration |
|
|
54 |
config = BaalExperimentConfig( |
|
|
55 |
max_epoch=15, |
|
|
56 |
batch_size=32 |
|
|
57 |
) |
|
|
58 |
|
|
|
59 |
# Repetitions of the experiment |
|
|
60 |
for i in range(INITIAL_REPETITION, FINAL_REPETITION): |
|
|
61 |
|
|
|
62 |
# set random seed |
|
|
63 |
set_seed(EXP_RANDOM_SEEDS[i]) |
|
|
64 |
|
|
|
65 |
# for each relation type |
|
|
66 |
for rel_type in N2C2_REL_TYPES: |
|
|
67 |
|
|
|
68 |
# load datasets |
|
|
69 |
train_dataset = load_from_disk( |
|
|
70 |
Path(pjoin(N2C2_HF_TRAIN_PATH, MODEL_NAME, rel_type)) |
|
|
71 |
) |
|
|
72 |
test_dataset = load_from_disk( |
|
|
73 |
Path(pjoin(N2C2_HF_TEST_PATH, MODEL_NAME, rel_type)) |
|
|
74 |
) |
|
|
75 |
|
|
|
76 |
# create trainer |
|
|
77 |
trainer = BilstmTrainer( |
|
|
78 |
dataset="n2c2", |
|
|
79 |
train_dataset=train_dataset, |
|
|
80 |
test_dataset=test_dataset, |
|
|
81 |
relation_type=rel_type, |
|
|
82 |
) |
|
|
83 |
|
|
|
84 |
# for each query strategy |
|
|
85 |
for query_strategy in BAAL_QUERY_STRATEGIES: |
|
|
86 |
exp_config = deepcopy(config) |
|
|
87 |
trainer.train_active_learning(query_strategy, exp_config) |
|
|
88 |
``` |
|
|
89 |
|