Thesis-Diagnostic-Caption / Git / [03245f] /utils/metrics.py

Models:
philipB/
Thesis-Diagnostic-Caption
Downloads: 1
[03245f]: / utils / metrics.py
History
Download this file
76 lines (63 with data), 2.8 kB

import pandas as pd
import pandas as pd

from .pycocoevalcap.bleu.bleu import Bleu
from .pycocoevalcap.meteor import Meteor
from .pycocoevalcap.rouge import Rouge
import re

import errno
import os

try:
    os.mkdir('results')
except OSError as exc:
    if exc.errno != errno.EEXIST:
        raise
    pass

def preprocess_captions(images_captions):
    """
    :param images_captions: Dictionary with image ids as keys and captions as values
    :return: Dictionary with the processed captions as values
    """

    # Clean for BioASQ
    bioclean = lambda t: re.sub('[.,?;*!%^&_+():-\[\]{}]', '',
                                t.replace('"', '').replace('/', '').replace('\\', '').replace("'",
                                                                                              '').strip().lower())
    pr_captions = {}
    # Apply bio clean to data
    for image in images_captions:
        # Save caption to an array to match MSCOCO format
        pr_captions[image] = [bioclean(images_captions[image])]

    return pr_captions

def compute_scores(gts:str, res:str, save_scores:bool=True):
    """
    Performs the MS COCO evaluation using the Python 3 implementation (https://github.com/salaniz/pycocoevalcap)

    :param gts: Dictionary with the image ids and their gold captions,
    :param res: Dictionary with the image ids ant their generated captions
    :print: Evaluation score (the mean of the scores of all the instances) for each measure
    """
    # convert pd.Dataframe to dict
    gold_captions_df = pd.read_csv(gts, sep='|', names=['ID', 'caption'])
    pred_captions_df = pd.read_csv(res, sep='|', names=['ID', 'caption'])
    
    gold_captions = preprocess_captions(dict( zip( gold_captions_df.ID.to_list(), gold_captions_df.caption.to_list() ) ) )
    pred_captions = preprocess_captions( dict( zip( pred_captions_df.ID.to_list(), pred_captions_df.caption.to_list() ) ) )
    

    # Set up scorers
    scorers = [
        (Bleu(4), ["BLEU_1", "BLEU_2", "BLEU_3", "BLEU_4"]),
        (Meteor(), "METEOR"),
        (Rouge(), "ROUGE_L")
    ]
    metrics_scores = {}
    # Compute score for each metric
    for scorer, method in scorers:
        try:
            score, scores = scorer.compute_score(gold_captions, pred_captions, verbose=0)
        except TypeError:
            score, scores = scorer.compute_score(gold_captions, pred_captions)
        if type(method) == list:
            for sc, m in zip(score, method):
                metrics_scores[m] = [round(sc*100, 1)]
        else:
            metrics_scores[method] = [round(score*100, 1)]

    if save_scores:
        scores_df = pd.DataFrame.from_dict(metrics_scores)
        scores_df.to_csv('results/scores.csv', sep='\t')

    return metrics_scores