[03245f]: / utils / metrics.py

Download this file

76 lines (63 with data), 2.8 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pandas as pd
import pandas as pd
from .pycocoevalcap.bleu.bleu import Bleu
from .pycocoevalcap.meteor import Meteor
from .pycocoevalcap.rouge import Rouge
import re
import errno
import os
try:
os.mkdir('results')
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
pass
def preprocess_captions(images_captions):
"""
:param images_captions: Dictionary with image ids as keys and captions as values
:return: Dictionary with the processed captions as values
"""
# Clean for BioASQ
bioclean = lambda t: re.sub('[.,?;*!%^&_+():-\[\]{}]', '',
t.replace('"', '').replace('/', '').replace('\\', '').replace("'",
'').strip().lower())
pr_captions = {}
# Apply bio clean to data
for image in images_captions:
# Save caption to an array to match MSCOCO format
pr_captions[image] = [bioclean(images_captions[image])]
return pr_captions
def compute_scores(gts:str, res:str, save_scores:bool=True):
"""
Performs the MS COCO evaluation using the Python 3 implementation (https://github.com/salaniz/pycocoevalcap)
:param gts: Dictionary with the image ids and their gold captions,
:param res: Dictionary with the image ids ant their generated captions
:print: Evaluation score (the mean of the scores of all the instances) for each measure
"""
# convert pd.Dataframe to dict
gold_captions_df = pd.read_csv(gts, sep='|', names=['ID', 'caption'])
pred_captions_df = pd.read_csv(res, sep='|', names=['ID', 'caption'])
gold_captions = preprocess_captions(dict( zip( gold_captions_df.ID.to_list(), gold_captions_df.caption.to_list() ) ) )
pred_captions = preprocess_captions( dict( zip( pred_captions_df.ID.to_list(), pred_captions_df.caption.to_list() ) ) )
# Set up scorers
scorers = [
(Bleu(4), ["BLEU_1", "BLEU_2", "BLEU_3", "BLEU_4"]),
(Meteor(), "METEOR"),
(Rouge(), "ROUGE_L")
]
metrics_scores = {}
# Compute score for each metric
for scorer, method in scorers:
try:
score, scores = scorer.compute_score(gold_captions, pred_captions, verbose=0)
except TypeError:
score, scores = scorer.compute_score(gold_captions, pred_captions)
if type(method) == list:
for sc, m in zip(score, method):
metrics_scores[m] = [round(sc*100, 1)]
else:
metrics_scores[method] = [round(score*100, 1)]
if save_scores:
scores_df = pd.DataFrame.from_dict(metrics_scores)
scores_df.to_csv('results/scores.csv', sep='\t')
return metrics_scores