[735bb5]: / src / experiments / rf.py

Download this file

136 lines (110 with data), 4.5 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Experiments on the Random Forest model and the different datasets (i.e. n2c2, DDI)
"""
# Package Dependencies
# --------------------
from .common import final_repetition
# Local Dependencies
# ------------------
from models import RelationCollection
from training.base import ALExperimentConfig
from training.rf import RandomForestTrainer
from training.config import PLExperimentConfig, ALExperimentConfig
from utils import set_seed
# Constants
# ----------
from constants import N2C2_REL_TYPES, EXP_RANDOM_SEEDS, RFQueryStrategy
# Experiments
# -----------
def rf_passive_learning_n2c2(init_repetiton: int = 0, n_repetitions: int = 5, logging: bool = True):
"""
Model: Random Forest
Dataset: n2c2
Learning: passive
"""
collections = RelationCollection.load_collections("n2c2", splits=["train", "test"])
config = PLExperimentConfig()
for repetition in range(init_repetiton, final_repetition(init_repetiton, n_repetitions)):
# set random seed
random_seed = EXP_RANDOM_SEEDS[repetition]
set_seed(random_seed)
config.seed = random_seed
for rel_type in N2C2_REL_TYPES:
train_collection = collections["train"].type_subcollection(rel_type)
test_collection = collections["test"].type_subcollection(rel_type)
trainer = RandomForestTrainer(
dataset="n2c2",
train_dataset=train_collection,
test_dataset=test_collection,
relation_type=rel_type,
)
trainer.train_passive_learning(config=config, logging=logging)
def rf_passive_learning_ddi(init_repetiton: int = 0, n_repetitions: int = 5, logging: bool = True):
"""
Model: Random Forest
Dataset: DDI
Learning: passive
"""
collections = RelationCollection.load_collections("ddi", splits=["train", "test"])
train_collection = collections["train"]
test_collection = collections["test"]
config = PLExperimentConfig()
trainer = RandomForestTrainer(
dataset="ddi",
train_dataset=train_collection,
test_dataset=test_collection,
)
for repetition in range(init_repetiton, final_repetition(init_repetiton, n_repetitions)):
# set random seed
random_seed = EXP_RANDOM_SEEDS[repetition]
set_seed(random_seed)
config.seed = random_seed
trainer.train_passive_learning(config=config, logging=logging)
def rf_active_learning_n2c2(init_repetiton: int = 0, n_repetitions: int = 5, logging: bool = True):
"""
Model: Random Forest
Dataset: n2c2
Learning: active
"""
collections = RelationCollection.load_collections("n2c2", splits=["train", "test"])
config = ALExperimentConfig()
for repetition in range(init_repetiton, final_repetition(init_repetiton, n_repetitions)):
# set random seed
random_seed = EXP_RANDOM_SEEDS[repetition]
set_seed(random_seed)
config.seed = random_seed
for rel_type in N2C2_REL_TYPES:
train_collection = collections["train"].type_subcollection(rel_type)
test_collection = collections["test"].type_subcollection(rel_type)
trainer = RandomForestTrainer(
dataset="n2c2",
train_dataset=train_collection,
test_dataset=test_collection,
relation_type=rel_type,
)
for query_strategy in RFQueryStrategy:
trainer.train_active_learning(query_strategy, config, logging=logging)
def rf_active_learning_ddi(init_repetiton: int = 0, n_repetitions: int = 5, logging: bool = True):
"""
Model: Random Forest
Dataset: DDI
Learning: active
"""
collections = RelationCollection.load_collections("ddi", splits=["train", "test"])
train_collection = collections["train"]
test_collection = collections["test"]
config = ALExperimentConfig()
for repetition in range(init_repetiton, final_repetition(init_repetiton, n_repetitions)):
# set random seed
random_seed = EXP_RANDOM_SEEDS[repetition]
set_seed(random_seed)
config.seed = random_seed
trainer = RandomForestTrainer(
dataset="ddi",
train_dataset=train_collection,
test_dataset=test_collection,
)
for query_strategy in RFQueryStrategy:
trainer.train_active_learning(query_strategy, config, logging=logging)