[79668b]: / tests / regression / test_regression.py

Download this file

104 lines (82 with data), 3.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
import json
from typing import Optional
from docdeid import Annotation, AnnotationSet
from deduce import Deduce
def regression_test(
model: Deduce,
examples_file: str,
enabled: set[str],
known_failures: Optional[set[int]] = None,
):
if known_failures is None:
known_failures = set()
with open(examples_file, "rb") as file:
examples = json.load(file)["examples"]
failures = set()
for example in examples:
trues = AnnotationSet(
Annotation(**annotation) for annotation in example["annotations"]
)
preds = model.deidentify(text=example["text"], enabled=enabled).annotations
try:
assert trues == preds
except AssertionError:
failures.add(example["id"])
assert failures == known_failures
def annotators_from_group(model: Deduce, group: str) -> set[str]:
return {name for name, _ in model.processors[group]}.union({group})
class TestRegression:
def test_regression_name(self, model):
regression_test(
model=model,
examples_file="tests/data/regression_cases/names.json",
enabled=annotators_from_group(model, "names"),
)
def test_regression_location(self, model):
regression_test(
model=model,
examples_file="tests/data/regression_cases/locations.json",
enabled=annotators_from_group(model, "locations"),
)
def test_regression_institution(self, model):
regression_test(
model=model,
examples_file="tests/data/regression_cases/institutions.json",
enabled=annotators_from_group(model, "institutions"),
)
def test_regression_date(self, model):
regression_test(
model=model,
examples_file="tests/data/regression_cases/dates.json",
enabled=annotators_from_group(model, "dates"),
)
def test_regression_age(self, model):
regression_test(
model=model,
examples_file="tests/data/regression_cases/ages.json",
enabled=annotators_from_group(model, "ages"),
)
def test_regression_identifier(self, model):
regression_test(
model=model,
examples_file="tests/data/regression_cases/identifiers.json",
enabled=annotators_from_group(model, "identifiers"),
)
def test_regression_phone(self, model):
regression_test(
model=model,
examples_file="tests/data/regression_cases/phone_numbers.json",
enabled=annotators_from_group(model, "phone_numbers"),
)
def test_regression_email(self, model):
regression_test(
model=model,
examples_file="tests/data/regression_cases/emails.json",
enabled=annotators_from_group(model, "email_addresses"),
)
def test_regression_url(self, model):
regression_test(
model=model,
examples_file="tests/data/regression_cases/urls.json",
enabled=annotators_from_group(model, "urls"),
)