|
a |
|
b/src/evaluation/tables.py |
|
|
1 |
# Base Dependencies |
|
|
2 |
# ----------------- |
|
|
3 |
import re |
|
|
4 |
import numpy as np |
|
|
5 |
from pathlib import Path |
|
|
6 |
from os.path import join as pjoin |
|
|
7 |
|
|
|
8 |
# Local Dependencies |
|
|
9 |
# ------------------ |
|
|
10 |
from evaluation.io import ( |
|
|
11 |
collect_step_times, |
|
|
12 |
collect_annotation_rates, |
|
|
13 |
collect_step_times_sum, |
|
|
14 |
) |
|
|
15 |
|
|
|
16 |
# 3rd-Party Dependencies |
|
|
17 |
# ---------------------- |
|
|
18 |
import pandas as pd |
|
|
19 |
from tabulate import tabulate |
|
|
20 |
|
|
|
21 |
# Constants |
|
|
22 |
# --------- |
|
|
23 |
from constants import ( |
|
|
24 |
N2C2_REL_TYPES, |
|
|
25 |
N2C2_REL_TEST_WEIGHTS, |
|
|
26 |
METHODS_NAMES, |
|
|
27 |
) |
|
|
28 |
from evaluation.explainability.random_forest import FEATURE_LABELS |
|
|
29 |
|
|
|
30 |
FORMAT_STRATEGY = {"random": 1, "LC": 2, "BatchLC": 3, "BatchBALD": 4} |
|
|
31 |
|
|
|
32 |
|
|
|
33 |
def _fcell_ar(mean: float, std: float, decimals: int = 3) -> str: |
|
|
34 |
"""Formats a cell of an Annotation Rate table""" |
|
|
35 |
value = "{:2.2f} +- {:2.2f}".format(round(mean, decimals), round(std, decimals)) |
|
|
36 |
value = re.sub(r"^0\.", ".", value) |
|
|
37 |
return value |
|
|
38 |
|
|
|
39 |
|
|
|
40 |
def _fcell_pl(mean: float, std: float, decimals: int = 3) -> str: |
|
|
41 |
"""Formats a cell of a Passive Learning table |
|
|
42 |
|
|
|
43 |
Args: |
|
|
44 |
mean (float): mean value |
|
|
45 |
std (float): standard deviation |
|
|
46 |
decimals (int, optional): number of decimals to represent the values. Defaults to 3. |
|
|
47 |
|
|
|
48 |
Returns: |
|
|
49 |
str: formatted cell content |
|
|
50 |
""" |
|
|
51 |
value = "{:0.3f}+-{:0.2f}".format(round(mean, decimals), round(std, decimals)) |
|
|
52 |
value = re.sub(r"^0\.", ".", value) |
|
|
53 |
return value |
|
|
54 |
|
|
|
55 |
|
|
|
56 |
def _fcell_al(mean: float, std: float, strategy: str, decimals: int = 3) -> str: |
|
|
57 |
"""Formats a cell of an Active Learning table |
|
|
58 |
|
|
|
59 |
Args: |
|
|
60 |
mean (float): mean value |
|
|
61 |
std (float): standard deviation |
|
|
62 |
strategy (str): query strategy that obtain the (best) performance |
|
|
63 |
decimals (int, optional): number of decimals to represent the values. Defaults to 3. |
|
|
64 |
|
|
|
65 |
Returns: |
|
|
66 |
str: formatted cell content |
|
|
67 |
""" |
|
|
68 |
value = "{:0.3f}+-{:0.2f} superscript{}".format( |
|
|
69 |
round(mean, decimals), round(std, decimals), FORMAT_STRATEGY[strategy] |
|
|
70 |
) |
|
|
71 |
value = re.sub(r"0\.", ".", value) |
|
|
72 |
return value |
|
|
73 |
|
|
|
74 |
|
|
|
75 |
def rename_strategy(strategy: str) -> str: |
|
|
76 |
"""Renames the strategy to be displayed in the table""" |
|
|
77 |
if strategy in ["BatchLC", "BatchBALD"]: |
|
|
78 |
return "BatchLC / BatchBALD" |
|
|
79 |
else: |
|
|
80 |
return strategy |
|
|
81 |
|
|
|
82 |
|
|
|
83 |
# Main Functions |
|
|
84 |
# -------------- |
|
|
85 |
def pl_table_ddi(): |
|
|
86 |
"""Generates the results table for passive learning training on the DDI Extraction corpus""" |
|
|
87 |
TABLE_HEADERS = [ |
|
|
88 |
"Method", |
|
|
89 |
"Detection", |
|
|
90 |
"Effect", |
|
|
91 |
"Mechanism", |
|
|
92 |
"Advise", |
|
|
93 |
"Interaction", |
|
|
94 |
"Macro", |
|
|
95 |
"Micro", |
|
|
96 |
] |
|
|
97 |
|
|
|
98 |
# table with related work |
|
|
99 |
table = [ |
|
|
100 |
["Chowdhury et al.", ".800", ".628", ".679", ".692", ".547", ".648", ".651"], |
|
|
101 |
["Quan et al.", ".790", ".682", ".722", ".780", ".510", ".674", ".702"], |
|
|
102 |
] |
|
|
103 |
|
|
|
104 |
for i, method in enumerate(METHODS_NAMES.keys()): |
|
|
105 |
# read method's results |
|
|
106 |
path = Path(pjoin("results", "ddi", method, "passive learning", "results.csv")) |
|
|
107 |
df = pd.read_csv(path) |
|
|
108 |
|
|
|
109 |
df = df[ |
|
|
110 |
[ |
|
|
111 |
"DETECT_f1", |
|
|
112 |
"EFFECT_f1", |
|
|
113 |
"MECHANISM_f1", |
|
|
114 |
"ADVISE_f1", |
|
|
115 |
"INT_f1", |
|
|
116 |
"Macro_f1", |
|
|
117 |
"Micro_f1", |
|
|
118 |
] |
|
|
119 |
] |
|
|
120 |
# compute mean and standard deviation of experiments |
|
|
121 |
means = df.mean(axis=0) |
|
|
122 |
stds = df.std(axis=0) |
|
|
123 |
|
|
|
124 |
row = [METHODS_NAMES[method]] |
|
|
125 |
for i in range(len(means)): |
|
|
126 |
row.append(_fcell_pl(means[i], stds[i])) |
|
|
127 |
table.append(row) |
|
|
128 |
|
|
|
129 |
print(tabulate(table, headers=TABLE_HEADERS, tablefmt="latex")) |
|
|
130 |
|
|
|
131 |
|
|
|
132 |
def pl_table_n2c2(): |
|
|
133 |
"""Generates the results table for the passive learning training on the n2c2 corpus""" |
|
|
134 |
TABLE_HEADERS = [ |
|
|
135 |
"Method", |
|
|
136 |
"Strength", |
|
|
137 |
"Duration", |
|
|
138 |
"Route", |
|
|
139 |
"Form", |
|
|
140 |
"ADE", |
|
|
141 |
"Dosage", |
|
|
142 |
"Reason", |
|
|
143 |
"Frequency", |
|
|
144 |
"Macro", |
|
|
145 |
"Micro", |
|
|
146 |
] |
|
|
147 |
|
|
|
148 |
# table with the related work |
|
|
149 |
table = [ |
|
|
150 |
["Xu et al.", "-", "-", "-", "-", "-", "-", "-", "-", "-", ".965"], |
|
|
151 |
[ |
|
|
152 |
"Alimova et al.", |
|
|
153 |
".875", |
|
|
154 |
".769", |
|
|
155 |
".896", |
|
|
156 |
".843", |
|
|
157 |
".696", |
|
|
158 |
".874", |
|
|
159 |
".716", |
|
|
160 |
".843", |
|
|
161 |
".814", |
|
|
162 |
".852", |
|
|
163 |
], |
|
|
164 |
[ |
|
|
165 |
"Wei et al. ", |
|
|
166 |
".985", |
|
|
167 |
".892", |
|
|
168 |
".972", |
|
|
169 |
".975", |
|
|
170 |
".812", |
|
|
171 |
".971", |
|
|
172 |
".767", |
|
|
173 |
".964", |
|
|
174 |
".917", |
|
|
175 |
"-", |
|
|
176 |
], |
|
|
177 |
] |
|
|
178 |
|
|
|
179 |
for i, method in enumerate(METHODS_NAMES.keys()): |
|
|
180 |
all_experiments = pd.DataFrame() |
|
|
181 |
df_method = pd.read_csv( |
|
|
182 |
Path( |
|
|
183 |
pjoin( |
|
|
184 |
"results", "n2c2", "all", method, "passive learning", "results.csv" |
|
|
185 |
) |
|
|
186 |
) |
|
|
187 |
) |
|
|
188 |
|
|
|
189 |
# get results for each relation type |
|
|
190 |
for rel_type in N2C2_REL_TYPES + ["Macro", "Micro"]: |
|
|
191 |
df_relation = df_method[df_method["relation"] == rel_type] |
|
|
192 |
relation_column = pd.DataFrame({rel_type: df_relation["f1"].values}) |
|
|
193 |
all_experiments = pd.concat([all_experiments, relation_column], axis=1) |
|
|
194 |
|
|
|
195 |
# add method's row to latex table |
|
|
196 |
means = list(all_experiments.mean(axis=0)) |
|
|
197 |
stds = list(all_experiments.std(axis=0)) |
|
|
198 |
|
|
|
199 |
row = [METHODS_NAMES[method]] |
|
|
200 |
for j in range(len(means)): |
|
|
201 |
row.append(_fcell_pl(means[j], stds[j])) |
|
|
202 |
table.append(row) |
|
|
203 |
|
|
|
204 |
print(tabulate(table, headers=TABLE_HEADERS, tablefmt="latex")) |
|
|
205 |
|
|
|
206 |
|
|
|
207 |
def al_table_ddi(): |
|
|
208 |
"""Generates the results table for the active learning training on the DDI Extraction corpus""" |
|
|
209 |
TABLE_HEADERS = [ |
|
|
210 |
"Method", |
|
|
211 |
"Detection", |
|
|
212 |
"Effect", |
|
|
213 |
"Mechanism", |
|
|
214 |
"Advise", |
|
|
215 |
"Interaction", |
|
|
216 |
"Macro", |
|
|
217 |
"Micro", |
|
|
218 |
] |
|
|
219 |
|
|
|
220 |
metrics = [ |
|
|
221 |
"DETECT_f1 (max)", |
|
|
222 |
"EFFECT_f1 (max)", |
|
|
223 |
"MECHANISM_f1 (max)", |
|
|
224 |
"ADVISE_f1 (max)", |
|
|
225 |
"INT_f1 (max)", |
|
|
226 |
"Macro_f1 (max)", |
|
|
227 |
"Micro_f1 (max)", |
|
|
228 |
] |
|
|
229 |
table = [] |
|
|
230 |
for i, method in enumerate(METHODS_NAMES.keys()): |
|
|
231 |
# load results |
|
|
232 |
path = Path(pjoin("results", "ddi", method, "active learning", "results.csv")) |
|
|
233 |
if not path.is_file(): |
|
|
234 |
continue |
|
|
235 |
df = pd.read_csv(path) |
|
|
236 |
|
|
|
237 |
# sort resutls by creation time |
|
|
238 |
df = df.sort_values(by=["Creation Time"]) |
|
|
239 |
|
|
|
240 |
# discard unnecessary columns |
|
|
241 |
df = df[["strategy"] + metrics] |
|
|
242 |
|
|
|
243 |
# get means and stds of the runs |
|
|
244 |
means = df.groupby(["strategy"], as_index=False).mean() |
|
|
245 |
stds = df.groupby(["strategy"], as_index=False).std() |
|
|
246 |
|
|
|
247 |
# add method's row to latex table |
|
|
248 |
row = [METHODS_NAMES[method]] |
|
|
249 |
for metric in metrics: |
|
|
250 |
try: |
|
|
251 |
idxmax = means[metric].idxmax() |
|
|
252 |
mean = means.iloc[idxmax][metric] |
|
|
253 |
std = stds.iloc[idxmax][metric] |
|
|
254 |
strategy = means.iloc[idxmax]["strategy"] |
|
|
255 |
|
|
|
256 |
row.append(_fcell_al(mean, std, strategy)) |
|
|
257 |
except TypeError: |
|
|
258 |
row.append("-") |
|
|
259 |
table.append(row) |
|
|
260 |
|
|
|
261 |
# print table |
|
|
262 |
print(tabulate(table, headers=TABLE_HEADERS, tablefmt="latex")) |
|
|
263 |
|
|
|
264 |
|
|
|
265 |
def al_table_n2c2(): |
|
|
266 |
"""Generates the results table for the active learning training on the n2c2 corpus""" |
|
|
267 |
|
|
|
268 |
TABLE_HEADERS = ["Method"] + N2C2_REL_TYPES + ["Macro", "Micro"] |
|
|
269 |
metric = "f1 (max)" |
|
|
270 |
table = [] |
|
|
271 |
for i, method in enumerate(METHODS_NAMES.keys()): |
|
|
272 |
# load results |
|
|
273 |
path = Path( |
|
|
274 |
pjoin("results", "n2c2", "all", method, "active learning", "results.csv") |
|
|
275 |
) |
|
|
276 |
|
|
|
277 |
if not path.is_file(): |
|
|
278 |
continue |
|
|
279 |
df = pd.read_csv(path) |
|
|
280 |
|
|
|
281 |
# sort resutls by creation time and relation type |
|
|
282 |
df = df.sort_values(by=["relation", "Creation Time"]) |
|
|
283 |
|
|
|
284 |
# discard unnecessary columns |
|
|
285 |
df = df[["strategy", "relation", metric]] |
|
|
286 |
|
|
|
287 |
# get means and stds of the runs |
|
|
288 |
means = df.groupby(["relation", "strategy"], as_index=False).mean() |
|
|
289 |
stds = df.groupby(["relation", "strategy"], as_index=False).std() |
|
|
290 |
|
|
|
291 |
# select the best value for each relation |
|
|
292 |
row_means = [] |
|
|
293 |
row_stds = [] |
|
|
294 |
row_strategies = [] |
|
|
295 |
for relation in N2C2_REL_TYPES + ["Macro", "Micro"]: |
|
|
296 |
idxmax = means[means["relation"] == relation][metric].idxmax() |
|
|
297 |
mean = means.iloc[idxmax][metric] |
|
|
298 |
std = stds.iloc[idxmax][metric] |
|
|
299 |
strategy = means.iloc[idxmax]["strategy"] |
|
|
300 |
|
|
|
301 |
row_means.append(mean) |
|
|
302 |
row_stds.append(std) |
|
|
303 |
row_strategies.append(strategy) |
|
|
304 |
|
|
|
305 |
# add method's row to latex table |
|
|
306 |
row = [METHODS_NAMES[method]] |
|
|
307 |
for mean, std, strategy in zip(row_means, row_stds, row_strategies): |
|
|
308 |
row.append(_fcell_al(mean, std, strategy)) |
|
|
309 |
table.append(row) |
|
|
310 |
|
|
|
311 |
# print table |
|
|
312 |
print(tabulate(table, headers=TABLE_HEADERS, tablefmt="latex")) |
|
|
313 |
|
|
|
314 |
|
|
|
315 |
def al_improvements_table_n2c2(): |
|
|
316 |
"""Generates the improvements table for the active learning training on the n2c2 corpus""" |
|
|
317 |
TABLE_HEADERS = [ |
|
|
318 |
"Strategy", |
|
|
319 |
"Strength", |
|
|
320 |
"Duration", |
|
|
321 |
"Route", |
|
|
322 |
"Form", |
|
|
323 |
"ADE", |
|
|
324 |
"Dosage", |
|
|
325 |
"Reason", |
|
|
326 |
"Frequency", |
|
|
327 |
"Macro", |
|
|
328 |
"Micro", |
|
|
329 |
] |
|
|
330 |
|
|
|
331 |
for i, method in enumerate(METHODS_NAMES.keys()): |
|
|
332 |
table = [] |
|
|
333 |
all_experiments = pd.DataFrame() |
|
|
334 |
pl_results = pd.read_csv( |
|
|
335 |
Path( |
|
|
336 |
pjoin( |
|
|
337 |
"results", "n2c2", "all", method, "passive learning", "results.csv" |
|
|
338 |
) |
|
|
339 |
) |
|
|
340 |
) |
|
|
341 |
al_results = pd.read_csv( |
|
|
342 |
Path( |
|
|
343 |
pjoin( |
|
|
344 |
"results", "n2c2", "all", method, "active learning", "results.csv" |
|
|
345 |
) |
|
|
346 |
) |
|
|
347 |
) |
|
|
348 |
|
|
|
349 |
# sort resutls by creation time and relation type |
|
|
350 |
al_results = al_results.sort_values(by=["relation", "Creation Time"]) |
|
|
351 |
|
|
|
352 |
# discard unnecessary columns |
|
|
353 |
al_results = al_results[["strategy", "relation", "f1 (max)"]] |
|
|
354 |
|
|
|
355 |
# get results for each relation type |
|
|
356 |
for strategy in al_results["strategy"].unique(): |
|
|
357 |
row = [strategy] |
|
|
358 |
for rel_type in N2C2_REL_TYPES + ["Macro", "Micro"]: |
|
|
359 |
pl_score = pl_results.loc[ |
|
|
360 |
pl_results["relation"] == rel_type, "f1" |
|
|
361 |
].mean() |
|
|
362 |
al_score = al_results.loc[ |
|
|
363 |
(al_results["relation"] == rel_type) |
|
|
364 |
& (al_results["strategy"] == strategy), |
|
|
365 |
"f1 (max)", |
|
|
366 |
].mean() |
|
|
367 |
improvement = (al_score - pl_score) * 100 |
|
|
368 |
row.append(improvement) |
|
|
369 |
|
|
|
370 |
table.append(row) |
|
|
371 |
|
|
|
372 |
print("Method: ", METHODS_NAMES[method]) |
|
|
373 |
print(tabulate(table, headers=TABLE_HEADERS, tablefmt="markdown")) |
|
|
374 |
print("\n\n") |
|
|
375 |
|
|
|
376 |
|
|
|
377 |
def step_time_table(): |
|
|
378 |
"""Generates the results table for the AL step times""" |
|
|
379 |
|
|
|
380 |
ddi_data = collect_step_times(Path(pjoin("results", "ddi"))) |
|
|
381 |
n2c2_data = collect_step_times(Path(pjoin("results", "n2c2", "all"))) |
|
|
382 |
ddi_data["Corpus"] = "DDI" |
|
|
383 |
n2c2_data["Corpus"] = "n2c2" |
|
|
384 |
data = pd.concat([ddi_data, n2c2_data]) |
|
|
385 |
|
|
|
386 |
# edit columns |
|
|
387 |
data["strategy"] = data["strategy"].apply(lambda x: rename_strategy(x)) |
|
|
388 |
|
|
|
389 |
for column in [ |
|
|
390 |
"iter_time (average)", |
|
|
391 |
"iter_time (max)", |
|
|
392 |
"iter_time (min)", |
|
|
393 |
]: |
|
|
394 |
data[column] = data[column].apply(lambda x: x / 60) |
|
|
395 |
data[column] = data[column].apply(lambda x: round(x, 2)) |
|
|
396 |
|
|
|
397 |
# create table |
|
|
398 |
HEADERS = ["Method", "Strategy", "n2c2", "n2c2", "n2c2", "DDI", "DDI", "DDI"] |
|
|
399 |
table = [["Method", "Strategy", "Min.", "Avg.", "Max.", "Min.", "Avg.", "Max."]] |
|
|
400 |
for method in METHODS_NAMES.keys(): |
|
|
401 |
for q_strategy in ["random", "LC", "BatchLC / BatchBALD"]: |
|
|
402 |
row = [ |
|
|
403 |
METHODS_NAMES[method], |
|
|
404 |
q_strategy, |
|
|
405 |
] |
|
|
406 |
for corpus in ["n2c2", "DDI"]: |
|
|
407 |
for column in [ |
|
|
408 |
"iter_time (min)", |
|
|
409 |
"iter_time (average)", |
|
|
410 |
"iter_time (max)", |
|
|
411 |
]: |
|
|
412 |
index = ( |
|
|
413 |
(data["method"] == method) |
|
|
414 |
& (data["strategy"] == q_strategy) |
|
|
415 |
& (data["Corpus"] == corpus) |
|
|
416 |
) |
|
|
417 |
|
|
|
418 |
mean = data.loc[index, column].mean() |
|
|
419 |
std = data.loc[index, column].std() |
|
|
420 |
row.append(_fcell_ar(mean, std)) |
|
|
421 |
table.append(row) |
|
|
422 |
|
|
|
423 |
print(tabulate(table, headers=HEADERS, tablefmt="latex")) |
|
|
424 |
|
|
|
425 |
|
|
|
426 |
def step_time_sum_table(): |
|
|
427 |
"""Generates the results table for the total AL step time""" |
|
|
428 |
|
|
|
429 |
ddi_data = collect_step_times_sum(Path(pjoin("results", "ddi"))) |
|
|
430 |
n2c2_data = collect_step_times_sum(Path(pjoin("results", "n2c2", "all"))) |
|
|
431 |
data = dict() |
|
|
432 |
data["DDI"] = ddi_data |
|
|
433 |
data["n2c2"] = n2c2_data |
|
|
434 |
|
|
|
435 |
# create table |
|
|
436 |
HEADERS = ["Method", "Strategy", "n2c2", "DDI"] |
|
|
437 |
table = [] |
|
|
438 |
for method in METHODS_NAMES.keys(): |
|
|
439 |
if method == "rf": |
|
|
440 |
strategies = ["random", "LC", "BatchLC"] |
|
|
441 |
else: |
|
|
442 |
strategies = ["random", "LC", "BatchBALD"] |
|
|
443 |
for q_strategy in strategies: |
|
|
444 |
row = [ |
|
|
445 |
METHODS_NAMES[method], |
|
|
446 |
q_strategy, |
|
|
447 |
] |
|
|
448 |
for corpus in ["n2c2", "DDI"]: |
|
|
449 |
mean = data[corpus][method][q_strategy]["mean"] |
|
|
450 |
std = data[corpus][method][q_strategy]["std"] |
|
|
451 |
row.append(_fcell_ar(mean, std)) |
|
|
452 |
table.append(row) |
|
|
453 |
|
|
|
454 |
print(tabulate(table, headers=HEADERS, tablefmt="latex")) |
|
|
455 |
|
|
|
456 |
|
|
|
457 |
def ar_table(): |
|
|
458 |
ar_ddi = collect_annotation_rates(Path(pjoin("results", "ddi"))) |
|
|
459 |
ar_n2c2 = collect_annotation_rates(Path(pjoin("results", "n2c2", "all"))) |
|
|
460 |
ar_ddi["Corpus"] = "DDI" |
|
|
461 |
ar_n2c2["Corpus"] = "n2c2" |
|
|
462 |
ar_results = pd.concat([ar_ddi, ar_n2c2]) |
|
|
463 |
|
|
|
464 |
# edit columns |
|
|
465 |
ar_results["CAR"] = ar_results["CAR"].apply(lambda x: x * 100) |
|
|
466 |
ar_results["TAR"] = ar_results["TAR"].apply(lambda x: x * 100) |
|
|
467 |
ar_results["IAR"] = ar_results["IAR"].apply(lambda x: x * 100) |
|
|
468 |
|
|
|
469 |
# table |
|
|
470 |
HEADERS = [ |
|
|
471 |
"Method", |
|
|
472 |
"Strategy", |
|
|
473 |
"TAR (%)", |
|
|
474 |
"CAR (%)", |
|
|
475 |
"TAR (%)", |
|
|
476 |
"CAR (%)", |
|
|
477 |
] |
|
|
478 |
table = [] |
|
|
479 |
|
|
|
480 |
for method in METHODS_NAMES.keys(): |
|
|
481 |
for q_strategy in ["random", "LC", "BatchLC / BatchBALD"]: |
|
|
482 |
row = [ |
|
|
483 |
METHODS_NAMES[method], |
|
|
484 |
q_strategy, |
|
|
485 |
] |
|
|
486 |
for corpus in ["n2c2", "DDI"]: |
|
|
487 |
for metric in ["TAR", "CAR"]: |
|
|
488 |
index = ( |
|
|
489 |
(ar_results["method"] == method) |
|
|
490 |
& (ar_results["strategy"] == q_strategy) |
|
|
491 |
& (ar_results["Corpus"] == corpus) |
|
|
492 |
) |
|
|
493 |
|
|
|
494 |
mean = ar_results.loc[index, metric].mean() |
|
|
495 |
std = ar_results.loc[index, metric].std() |
|
|
496 |
row.append(_fcell_ar(mean, std)) |
|
|
497 |
table.append(row) |
|
|
498 |
|
|
|
499 |
print(tabulate(table, headers=HEADERS, tablefmt="latex")) |