[79668b]: / tests / unit / test_redactor.py

Download this file

82 lines (63 with data), 2.5 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import docdeid as dd
from deduce.redactor import DeduceRedactor
class TestDeduceRedactor:
def test_redact_patient(self):
proc = DeduceRedactor()
text = "Jan Jansen"
annotations = dd.AnnotationSet(
[
dd.Annotation(text="Jan", start_char=0, end_char=3, tag="patient"),
dd.Annotation(text="Jansen", start_char=4, end_char=10, tag="patient"),
]
)
expected_text = "[PATIENT] [PATIENT]"
assert proc.redact(text, annotations) == expected_text
def test_redact_mixed(self):
proc = DeduceRedactor()
text = "Jan Jansen, wonende in Rotterdam"
annotations = dd.AnnotationSet(
[
dd.Annotation(
text="Jan Jansen", start_char=0, end_char=10, tag="patient"
),
dd.Annotation(
text="Rotterdam", start_char=23, end_char=32, tag="woonplaats"
),
]
)
expected_text = "[PATIENT], wonende in [WOONPLAATS-1]"
assert proc.redact(text, annotations) == expected_text
def test_redact_count_multiple(self):
proc = DeduceRedactor()
text = "Jan Jansen, wonende in Rotterdam, verhuisd vanuit Groningen"
annotations = dd.AnnotationSet(
[
dd.Annotation(
text="Rotterdam", start_char=23, end_char=32, tag="woonplaats"
),
dd.Annotation(
text="Groningen", start_char=50, end_char=59, tag="woonplaats"
),
]
)
expected_text = (
"Jan Jansen, wonende in [WOONPLAATS-1], verhuisd vanuit [WOONPLAATS-2]"
)
assert proc.redact(text, annotations) == expected_text
def test_redact_count_multiple_fuzzy(self):
proc = DeduceRedactor()
text = "Jan Jansen, wonende in Ommen, verhuisd vanuit Emmen"
annotations = dd.AnnotationSet(
[
dd.Annotation(
text="Ommen", start_char=23, end_char=28, tag="woonplaats"
),
dd.Annotation(
text="Emmen", start_char=46, end_char=51, tag="woonplaats"
),
]
)
expected_text = (
"Jan Jansen, wonende in [WOONPLAATS-1], verhuisd vanuit [WOONPLAATS-1]"
)
assert proc.redact(text, annotations) == expected_text