[7fc5df]: / deidentify / evaluation / significance_testing.py

Download this file

106 lines (78 with data), 3.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
import argparse
import csv
import os
from os.path import dirname, join
from typing import List
import yaml
from loguru import logger
from deidentify.base import Document
from deidentify.evaluation import evaluate_run, evaluator
from deidentify.evaluation.art import ApproximateRandomizationTest
def _load_yaml(yaml_file):
with open(yaml_file, 'r') as stream:
config = yaml.safe_load(stream)
return config
def micro_f1(gold: List[Document], predicted: List[Document]):
return evaluator.Evaluator(gold, predicted).entity_level().f_score()
def micro_precision(gold: List[Document], predicted: List[Document]):
return evaluator.Evaluator(gold, predicted).entity_level().precision()
def micro_recall(gold: List[Document], predicted: List[Document]):
return evaluator.Evaluator(gold, predicted).entity_level().recall()
class SignificanceReport:
def __init__(self, title, corpus, part, runs, metrics, trials=10000):
self.title = title
self.corpus = corpus
self.part = part
self.runs = runs
self.trials = trials
self.metrics = metrics
out_dir = join(dirname(__file__), '../../output/evaluation', corpus)
self.out_file = join(out_dir, 'significance.csv')
os.makedirs(out_dir, exist_ok=True)
def _corpus_path(self):
return join(dirname(__file__), '../../data/corpus', self.corpus, self.part)
def _predictions_path(self, run_id):
return join(dirname(__file__), '../../output/predictions', self.corpus, run_id, self.part)
def art_test(self, gold, run_a, run_b, metric):
art = ApproximateRandomizationTest(gold, run_a, run_b, metric,
trials=self.trials)
return art.run()
def make_report(self):
logger.info('Generate significance report {}'.format(self.title))
logger.info('Corpus = "{}" part = "{}"'.format(self.corpus, self.part))
docs_path = self._corpus_path()
gold_documents = evaluate_run.get_documents(docs_path=docs_path, anns_path=docs_path)
with open(self.out_file, 'w') as file:
writer = csv.writer(file)
writer.writerow(['run_a', 'run_b', 'metric', 'p_value'])
for pair in self.runs:
pred_docs_run_a = evaluate_run.get_documents(
docs_path=docs_path, anns_path=self._predictions_path(pair['run_a']))
pred_docs_run_b = evaluate_run.get_documents(
docs_path=docs_path, anns_path=self._predictions_path(pair['run_b']))
for metric in self.metrics:
p_value = self.art_test(gold_documents, pred_docs_run_a, pred_docs_run_b,
metric)
writer.writerow([pair['run_a'], pair['run_b'], metric.__name__, p_value])
logger.info('{} - {} - {}'.format(pair, metric.__name__, p_value))
def arg_parser():
parser = argparse.ArgumentParser()
parser.add_argument("config_file", help="Significance test config file (.yaml)")
parser.add_argument("--trials", help="Run identifier", default=10000, type=int)
return parser.parse_args()
if __name__ == '__main__':
ARGS = arg_parser()
c = _load_yaml(ARGS.config_file)
report = SignificanceReport(
title=c['name'],
corpus=c['corpus'],
part=c['part'],
runs=c['run_ids'],
metrics=[
micro_f1,
micro_precision,
micro_recall
],
trials=ARGS.trials
)
report.make_report()