|
a |
|
b/utils/metrics.py |
|
|
1 |
import pandas as pd |
|
|
2 |
import pandas as pd |
|
|
3 |
|
|
|
4 |
from .pycocoevalcap.bleu.bleu import Bleu |
|
|
5 |
from .pycocoevalcap.meteor import Meteor |
|
|
6 |
from .pycocoevalcap.rouge import Rouge |
|
|
7 |
import re |
|
|
8 |
|
|
|
9 |
import errno |
|
|
10 |
import os |
|
|
11 |
|
|
|
12 |
try: |
|
|
13 |
os.mkdir('results') |
|
|
14 |
except OSError as exc: |
|
|
15 |
if exc.errno != errno.EEXIST: |
|
|
16 |
raise |
|
|
17 |
pass |
|
|
18 |
|
|
|
19 |
def preprocess_captions(images_captions): |
|
|
20 |
""" |
|
|
21 |
:param images_captions: Dictionary with image ids as keys and captions as values |
|
|
22 |
:return: Dictionary with the processed captions as values |
|
|
23 |
""" |
|
|
24 |
|
|
|
25 |
# Clean for BioASQ |
|
|
26 |
bioclean = lambda t: re.sub('[.,?;*!%^&_+():-\[\]{}]', '', |
|
|
27 |
t.replace('"', '').replace('/', '').replace('\\', '').replace("'", |
|
|
28 |
'').strip().lower()) |
|
|
29 |
pr_captions = {} |
|
|
30 |
# Apply bio clean to data |
|
|
31 |
for image in images_captions: |
|
|
32 |
# Save caption to an array to match MSCOCO format |
|
|
33 |
pr_captions[image] = [bioclean(images_captions[image])] |
|
|
34 |
|
|
|
35 |
return pr_captions |
|
|
36 |
|
|
|
37 |
def compute_scores(gts:str, res:str, save_scores:bool=True): |
|
|
38 |
""" |
|
|
39 |
Performs the MS COCO evaluation using the Python 3 implementation (https://github.com/salaniz/pycocoevalcap) |
|
|
40 |
|
|
|
41 |
:param gts: Dictionary with the image ids and their gold captions, |
|
|
42 |
:param res: Dictionary with the image ids ant their generated captions |
|
|
43 |
:print: Evaluation score (the mean of the scores of all the instances) for each measure |
|
|
44 |
""" |
|
|
45 |
# convert pd.Dataframe to dict |
|
|
46 |
gold_captions_df = pd.read_csv(gts, sep='|', names=['ID', 'caption']) |
|
|
47 |
pred_captions_df = pd.read_csv(res, sep='|', names=['ID', 'caption']) |
|
|
48 |
|
|
|
49 |
gold_captions = preprocess_captions(dict( zip( gold_captions_df.ID.to_list(), gold_captions_df.caption.to_list() ) ) ) |
|
|
50 |
pred_captions = preprocess_captions( dict( zip( pred_captions_df.ID.to_list(), pred_captions_df.caption.to_list() ) ) ) |
|
|
51 |
|
|
|
52 |
|
|
|
53 |
# Set up scorers |
|
|
54 |
scorers = [ |
|
|
55 |
(Bleu(4), ["BLEU_1", "BLEU_2", "BLEU_3", "BLEU_4"]), |
|
|
56 |
(Meteor(), "METEOR"), |
|
|
57 |
(Rouge(), "ROUGE_L") |
|
|
58 |
] |
|
|
59 |
metrics_scores = {} |
|
|
60 |
# Compute score for each metric |
|
|
61 |
for scorer, method in scorers: |
|
|
62 |
try: |
|
|
63 |
score, scores = scorer.compute_score(gold_captions, pred_captions, verbose=0) |
|
|
64 |
except TypeError: |
|
|
65 |
score, scores = scorer.compute_score(gold_captions, pred_captions) |
|
|
66 |
if type(method) == list: |
|
|
67 |
for sc, m in zip(score, method): |
|
|
68 |
metrics_scores[m] = [round(sc*100, 1)] |
|
|
69 |
else: |
|
|
70 |
metrics_scores[method] = [round(score*100, 1)] |
|
|
71 |
|
|
|
72 |
if save_scores: |
|
|
73 |
scores_df = pd.DataFrame.from_dict(metrics_scores) |
|
|
74 |
scores_df.to_csv('results/scores.csv', sep='\t') |
|
|
75 |
|
|
|
76 |
return metrics_scores |