Switch to unified view

a b/tests/unit/test_redactor.py
1
import docdeid as dd
2
3
from deduce.redactor import DeduceRedactor
4
5
6
class TestDeduceRedactor:
7
    def test_redact_patient(self):
8
        proc = DeduceRedactor()
9
        text = "Jan Jansen"
10
11
        annotations = dd.AnnotationSet(
12
            [
13
                dd.Annotation(text="Jan", start_char=0, end_char=3, tag="patient"),
14
                dd.Annotation(text="Jansen", start_char=4, end_char=10, tag="patient"),
15
            ]
16
        )
17
18
        expected_text = "[PATIENT] [PATIENT]"
19
20
        assert proc.redact(text, annotations) == expected_text
21
22
    def test_redact_mixed(self):
23
        proc = DeduceRedactor()
24
        text = "Jan Jansen, wonende in Rotterdam"
25
26
        annotations = dd.AnnotationSet(
27
            [
28
                dd.Annotation(
29
                    text="Jan Jansen", start_char=0, end_char=10, tag="patient"
30
                ),
31
                dd.Annotation(
32
                    text="Rotterdam", start_char=23, end_char=32, tag="woonplaats"
33
                ),
34
            ]
35
        )
36
37
        expected_text = "[PATIENT], wonende in [WOONPLAATS-1]"
38
39
        assert proc.redact(text, annotations) == expected_text
40
41
    def test_redact_count_multiple(self):
42
        proc = DeduceRedactor()
43
        text = "Jan Jansen, wonende in Rotterdam, verhuisd vanuit Groningen"
44
45
        annotations = dd.AnnotationSet(
46
            [
47
                dd.Annotation(
48
                    text="Rotterdam", start_char=23, end_char=32, tag="woonplaats"
49
                ),
50
                dd.Annotation(
51
                    text="Groningen", start_char=50, end_char=59, tag="woonplaats"
52
                ),
53
            ]
54
        )
55
56
        expected_text = (
57
            "Jan Jansen, wonende in [WOONPLAATS-1], verhuisd vanuit [WOONPLAATS-2]"
58
        )
59
60
        assert proc.redact(text, annotations) == expected_text
61
62
    def test_redact_count_multiple_fuzzy(self):
63
        proc = DeduceRedactor()
64
        text = "Jan Jansen, wonende in Ommen, verhuisd vanuit Emmen"
65
66
        annotations = dd.AnnotationSet(
67
            [
68
                dd.Annotation(
69
                    text="Ommen", start_char=23, end_char=28, tag="woonplaats"
70
                ),
71
                dd.Annotation(
72
                    text="Emmen", start_char=46, end_char=51, tag="woonplaats"
73
                ),
74
            ]
75
        )
76
77
        expected_text = (
78
            "Jan Jansen, wonende in [WOONPLAATS-1], verhuisd vanuit [WOONPLAATS-1]"
79
        )
80
81
        assert proc.redact(text, annotations) == expected_text