[cad161]: / tests / matchers / test_phrase.py

Download this file

65 lines (36 with data), 1.4 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import pytest
from edsnlp.matchers.phrase import EDSPhraseMatcher
def test_eds_phrase_matcher(doc, nlp):
matcher = EDSPhraseMatcher(nlp.vocab, attr="TEXT")
matcher.add("test", list(nlp.pipe(["test"])))
matcher.remove("test")
matcher.add("patient", list(nlp.pipe(["patient"])))
matches = matcher(doc, as_spans=False)
assert list(matches)
matches = matcher(doc[:10])
assert list(matches)
def test_offset(blank_nlp):
text = "Ceci est un test de matching"
doc = blank_nlp(text)
pattern = blank_nlp("matching")
matcher = EDSPhraseMatcher(blank_nlp.vocab, attr="TEXT")
matcher.add("test", [pattern])
for _, start, end in matcher(doc):
assert doc[start:end].text == pattern.text
for span in matcher(doc, as_spans=True):
span.text == pattern.text
for _, start, end in matcher(doc[2:]):
assert doc[start:end].text == pattern.text
for span in matcher(doc[2:], as_spans=True):
span.text == pattern.text
def test_remove(blank_nlp):
pattern = blank_nlp("matching")
pattern2 = blank_nlp("Ceci")
matcher = EDSPhraseMatcher(blank_nlp.vocab, attr="TEXT")
matcher.add("test", [pattern])
matcher.add("test", [pattern2])
assert len(matcher) == 1
with pytest.raises(KeyError):
matcher.remove("wrong_key")
matcher.remove("test")
assert len(matcher) == 0