Switch to unified view

a b/tests/evaluation/test_evaluate.py
1
from deidentify.base import Annotation, Document
2
from deidentify.evaluation.evaluator import ENTITY_TAG, Evaluator
3
4
5
def test_entity_level():
6
    gold = [
7
        Document(name='doc_a', text='', annotations=[Annotation('', 3, 6, 'MISC')]),
8
        Document(name='doc_b', text='', annotations=[Annotation('', 0, 2, 'PER')])
9
    ]
10
11
    predicted = [
12
        Document(name='doc_a', text='', annotations=[Annotation('', 2, 6, 'MISC')]),
13
        Document(name='doc_b', text='', annotations=[Annotation('', 0, 2, 'PER')])
14
    ]
15
16
    evaluator = Evaluator(gold, predicted)
17
    scores = evaluator.entity_level()
18
    assert scores.micro_avg_f_score() == 0.5
19
    assert scores.macro_avg_f_score() == 0.5
20
    assert scores.f_score('PER') == 1
21
    assert scores.f_score('MISC') == 0
22
23
24
def test_token_annotations():
25
    evaluator = Evaluator(gold=(), predicted=())
26
    doc = Document(name='doc_a', text='A B C D.', annotations=[
27
        Annotation('B C', 2, 5, 'PER'),
28
        Annotation('D.', 6, 8, 'ORG')
29
    ])
30
31
    assert evaluator.token_annotations(doc) == ['O', 'PER', 'PER', 'ORG']
32
    assert evaluator.token_annotations(doc, tag_blind=True) == ['O', 'ENT', 'ENT', 'ENT']
33
34
35
def test_token_level():
36
    text = 'A B C D.'
37
38
    gold_a = [Annotation('B C', 2, 5, 'PER')]
39
    gold_b = [Annotation('A', 0, 1, 'ORG'), Annotation('B', 2, 3, 'PER')]
40
41
    pred_a = [Annotation('B', 2, 3, 'PER'), Annotation('C', 4, 5, 'PER')]
42
    pred_b = [Annotation('A', 0, 1, 'ORG'), Annotation('B', 2, 3, 'ORG')]
43
44
    gold = [
45
        Document(name='doc_a', text=text, annotations=gold_a),
46
        Document(name='doc_b', text=text, annotations=gold_b)
47
    ]
48
49
    predicted = [
50
        Document(name='doc_a', text=text, annotations=pred_a),
51
        Document(name='doc_b', text=text, annotations=pred_b)
52
    ]
53
54
    evaluator = Evaluator(gold, predicted)
55
    scores = evaluator.token_level()
56
    assert scores.precision('PER') == 1
57
    assert scores.recall('PER') == 0.6667
58
    assert scores.f_score('PER') == 0.8
59
60
    assert scores.precision('ORG') == 0.5
61
    assert scores.recall('ORG') == 1
62
    assert scores.f_score('ORG') == 0.6667
63
64
65
def test_token_level_blind():
66
    gold_a = [Annotation('B C', 2, 5, 'PER')]
67
    gold_b = [Annotation('A', 0, 1, 'ORG')]
68
69
    pred_a = [Annotation('B', 2, 3, 'PER'), Annotation('C', 4, 5, 'PER')]
70
    pred_b = []
71
72
    gold = [
73
        Document(name='doc_a', text='A B C D.', annotations=gold_a),
74
        Document(name='doc_b', text='A B C D.', annotations=gold_b)
75
    ]
76
77
    predicted = [
78
        Document(name='doc_a', text='A B C D.', annotations=pred_a),
79
        Document(name='doc_b', text='A B C D.', annotations=pred_b)
80
    ]
81
82
    evaluator = Evaluator(gold, predicted)
83
    scores = evaluator.token_level_blind()
84
    assert scores.precision(ENTITY_TAG) == 1
85
    assert scores.recall(ENTITY_TAG) == 0.6667
86
    assert scores.f_score(ENTITY_TAG) == 0.8