|
a |
|
b/tests/matchers/test_phrase.py |
|
|
1 |
import pytest |
|
|
2 |
|
|
|
3 |
from edsnlp.matchers.phrase import EDSPhraseMatcher |
|
|
4 |
|
|
|
5 |
|
|
|
6 |
def test_eds_phrase_matcher(doc, nlp): |
|
|
7 |
matcher = EDSPhraseMatcher(nlp.vocab, attr="TEXT") |
|
|
8 |
|
|
|
9 |
matcher.add("test", list(nlp.pipe(["test"]))) |
|
|
10 |
matcher.remove("test") |
|
|
11 |
|
|
|
12 |
matcher.add("patient", list(nlp.pipe(["patient"]))) |
|
|
13 |
|
|
|
14 |
matches = matcher(doc, as_spans=False) |
|
|
15 |
|
|
|
16 |
assert list(matches) |
|
|
17 |
|
|
|
18 |
matches = matcher(doc[:10]) |
|
|
19 |
|
|
|
20 |
assert list(matches) |
|
|
21 |
|
|
|
22 |
|
|
|
23 |
def test_offset(blank_nlp): |
|
|
24 |
|
|
|
25 |
text = "Ceci est un test de matching" |
|
|
26 |
|
|
|
27 |
doc = blank_nlp(text) |
|
|
28 |
pattern = blank_nlp("matching") |
|
|
29 |
|
|
|
30 |
matcher = EDSPhraseMatcher(blank_nlp.vocab, attr="TEXT") |
|
|
31 |
|
|
|
32 |
matcher.add("test", [pattern]) |
|
|
33 |
|
|
|
34 |
for _, start, end in matcher(doc): |
|
|
35 |
assert doc[start:end].text == pattern.text |
|
|
36 |
|
|
|
37 |
for span in matcher(doc, as_spans=True): |
|
|
38 |
span.text == pattern.text |
|
|
39 |
|
|
|
40 |
for _, start, end in matcher(doc[2:]): |
|
|
41 |
assert doc[start:end].text == pattern.text |
|
|
42 |
|
|
|
43 |
for span in matcher(doc[2:], as_spans=True): |
|
|
44 |
span.text == pattern.text |
|
|
45 |
|
|
|
46 |
|
|
|
47 |
def test_remove(blank_nlp): |
|
|
48 |
|
|
|
49 |
pattern = blank_nlp("matching") |
|
|
50 |
pattern2 = blank_nlp("Ceci") |
|
|
51 |
|
|
|
52 |
matcher = EDSPhraseMatcher(blank_nlp.vocab, attr="TEXT") |
|
|
53 |
|
|
|
54 |
matcher.add("test", [pattern]) |
|
|
55 |
matcher.add("test", [pattern2]) |
|
|
56 |
|
|
|
57 |
assert len(matcher) == 1 |
|
|
58 |
|
|
|
59 |
with pytest.raises(KeyError): |
|
|
60 |
matcher.remove("wrong_key") |
|
|
61 |
|
|
|
62 |
matcher.remove("test") |
|
|
63 |
|
|
|
64 |
assert len(matcher) == 0 |