[735bb5]: / src / evaluation / plots / boxplots.py

Download this file

152 lines (127 with data), 4.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# Base Dependencies
# -----------------
import numpy as np
from typing import Optional, Dict, List
from pathlib import Path
from joblib import load
from os.path import join as pjoin
# Package Dependencies
# --------------------
from evaluation.io import (
collect_pl_results_ddi,
collect_pl_results_n2c2,
collect_step_times,
)
# 3rd-Party Dependencies
# ----------------------
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
# Constants
# ---------
from constants import N2C2_REL_TYPES, METHODS_NAMES
COLOR_PALETTE = "Set2"
# Auxiliar Functions
# ------------------
def __boxplot_pl(
results: pd.DataFrame,
metric_name: str = "Micro_f1",
title: Optional[str] = None,
output_file: Optional[Path] = None,
):
"""Boxplot of the results of the experiments.
Args:
results (pd.DataFrame): results of the experiments
title (Optional[str], optional): title of the plot. Defaults to None.
output_file (Optional[Path], optional): path of the output file. Defaults to None.
"""
sns.set()
sns.set_style("white") # grid style
colors = sns.color_palette(COLOR_PALETTE) # set color palette
sns.boxplot(x="method", y=metric_name, data=results, palette=colors, orient="v")
plt.xlabel("Method")
plt.ylabel("Micro F1 Score")
plt.xticks(
ticks=list(range(len(METHODS_NAMES))),
labels=list(METHODS_NAMES.values()),
rotation=45,
ha="right",
)
if title:
plt.title(title)
if output_file:
plt.savefig(output_file, bbox_inches="tight")
else:
plt.show()
plt.clf()
# Main Function
# -------------
def pl_boxplot_both_corpus():
"""Plots a box plot showing the passive learning results for both corpora"""
title = None # "Passive Learning Results"
output_file = Path(pjoin("results", "pl_boxplot_both_corpus.png"))
n2c2_folder = Path(pjoin("results", "n2c2", "all"))
ddi_folder = Path(pjoin("results", "ddi"))
# collect results
n2c2_pl_results: pd.DataFrame = collect_pl_results_n2c2(n2c2_folder)
ddi_pl_results: pd.DataFrame = collect_pl_results_ddi(ddi_folder)
# combine results into single dataframe
n2c2_pl_results["corpus"] = "n2c2"
n2c2_pl_results = n2c2_pl_results[n2c2_pl_results["relation"] == "Micro"]
n2c2_pl_results = n2c2_pl_results[["f1", "method"]]
n2c2_pl_results = n2c2_pl_results.rename(columns={"f1": "Micro_f1"})
n2c2_pl_results["corpus"] = "n2c2"
ddi_pl_results["corpus"] = "DDI"
results = pd.concat([n2c2_pl_results, ddi_pl_results])
# plot
sns.set()
sns.set_style("whitegrid") # grid style
# colors = sns.color_palette(COLOR_PALETTE) # set color palette
colors = ["#a1dab4", "#FEFEBB"]
plt.figure(figsize=(6, 8), dpi=1200)
ax = sns.boxplot(
x="method", y="Micro_f1", data=results, palette=colors, orient="v", hue="corpus"
)
sns.move_legend(
ax,
"lower center",
bbox_to_anchor=(0.5, 1),
ncol=2,
title=None,
frameon=True,
)
plt.xlabel("Method")
plt.ylabel("Micro F1 Score")
plt.xticks(
ticks=list(range(len(METHODS_NAMES))),
labels=["RF", "BiLSTM", "CBERT", "CBERT-pairs"],
rotation=0,
ha="right",
)
if title:
plt.title(title)
if output_file:
plt.savefig(output_file, bbox_inches="tight")
else:
plt.show()
plt.clf()
def pl_boxplot_n2c2():
"""Plots a box plot showing the passive learning results for the N2C2 corpus"""
folder = Path(pjoin("results", "n2c2", "all"))
pl_results = collect_pl_results_n2c2(folder)
__boxplot_pl(
pl_results,
title="Corpus: n2c2",
metric_name="Micro_f1",
output_file=Path(pjoin("results", "n2c2", "all", "pl_boxplot_n2c2.png")),
)
def pl_boxplot_ddi():
"""Plots a box plot showing the passive learning results for the DDI Extraction corpus"""
folder = Path(pjoin("results", "ddi"))
pl_results = collect_pl_results_ddi(folder)
__boxplot_pl(
pl_results,
title="Corpus: DDI",
metric_name="Micro_f1",
output_file=Path(pjoin("results", "ddi", "pl_boxplot_ddi.png")),
)