a b/utils/metrics.py
1
import pandas as pd
2
import pandas as pd
3
4
from .pycocoevalcap.bleu.bleu import Bleu
5
from .pycocoevalcap.meteor import Meteor
6
from .pycocoevalcap.rouge import Rouge
7
import re
8
9
import errno
10
import os
11
12
try:
13
    os.mkdir('results')
14
except OSError as exc:
15
    if exc.errno != errno.EEXIST:
16
        raise
17
    pass
18
19
def preprocess_captions(images_captions):
20
    """
21
    :param images_captions: Dictionary with image ids as keys and captions as values
22
    :return: Dictionary with the processed captions as values
23
    """
24
25
    # Clean for BioASQ
26
    bioclean = lambda t: re.sub('[.,?;*!%^&_+():-\[\]{}]', '',
27
                                t.replace('"', '').replace('/', '').replace('\\', '').replace("'",
28
                                                                                              '').strip().lower())
29
    pr_captions = {}
30
    # Apply bio clean to data
31
    for image in images_captions:
32
        # Save caption to an array to match MSCOCO format
33
        pr_captions[image] = [bioclean(images_captions[image])]
34
35
    return pr_captions
36
37
def compute_scores(gts:str, res:str, save_scores:bool=True):
38
    """
39
    Performs the MS COCO evaluation using the Python 3 implementation (https://github.com/salaniz/pycocoevalcap)
40
41
    :param gts: Dictionary with the image ids and their gold captions,
42
    :param res: Dictionary with the image ids ant their generated captions
43
    :print: Evaluation score (the mean of the scores of all the instances) for each measure
44
    """
45
    # convert pd.Dataframe to dict
46
    gold_captions_df = pd.read_csv(gts, sep='|', names=['ID', 'caption'])
47
    pred_captions_df = pd.read_csv(res, sep='|', names=['ID', 'caption'])
48
    
49
    gold_captions = preprocess_captions(dict( zip( gold_captions_df.ID.to_list(), gold_captions_df.caption.to_list() ) ) )
50
    pred_captions = preprocess_captions( dict( zip( pred_captions_df.ID.to_list(), pred_captions_df.caption.to_list() ) ) )
51
    
52
53
    # Set up scorers
54
    scorers = [
55
        (Bleu(4), ["BLEU_1", "BLEU_2", "BLEU_3", "BLEU_4"]),
56
        (Meteor(), "METEOR"),
57
        (Rouge(), "ROUGE_L")
58
    ]
59
    metrics_scores = {}
60
    # Compute score for each metric
61
    for scorer, method in scorers:
62
        try:
63
            score, scores = scorer.compute_score(gold_captions, pred_captions, verbose=0)
64
        except TypeError:
65
            score, scores = scorer.compute_score(gold_captions, pred_captions)
66
        if type(method) == list:
67
            for sc, m in zip(score, method):
68
                metrics_scores[m] = [round(sc*100, 1)]
69
        else:
70
            metrics_scores[method] = [round(score*100, 1)]
71
72
    if save_scores:
73
        scores_df = pd.DataFrame.from_dict(metrics_scores)
74
        scores_df.to_csv('results/scores.csv', sep='\t')
75
76
    return metrics_scores