--- a +++ b/tests/matchers/test_phrase.py @@ -0,0 +1,64 @@ +import pytest + +from edsnlp.matchers.phrase import EDSPhraseMatcher + + +def test_eds_phrase_matcher(doc, nlp): + matcher = EDSPhraseMatcher(nlp.vocab, attr="TEXT") + + matcher.add("test", list(nlp.pipe(["test"]))) + matcher.remove("test") + + matcher.add("patient", list(nlp.pipe(["patient"]))) + + matches = matcher(doc, as_spans=False) + + assert list(matches) + + matches = matcher(doc[:10]) + + assert list(matches) + + +def test_offset(blank_nlp): + + text = "Ceci est un test de matching" + + doc = blank_nlp(text) + pattern = blank_nlp("matching") + + matcher = EDSPhraseMatcher(blank_nlp.vocab, attr="TEXT") + + matcher.add("test", [pattern]) + + for _, start, end in matcher(doc): + assert doc[start:end].text == pattern.text + + for span in matcher(doc, as_spans=True): + span.text == pattern.text + + for _, start, end in matcher(doc[2:]): + assert doc[start:end].text == pattern.text + + for span in matcher(doc[2:], as_spans=True): + span.text == pattern.text + + +def test_remove(blank_nlp): + + pattern = blank_nlp("matching") + pattern2 = blank_nlp("Ceci") + + matcher = EDSPhraseMatcher(blank_nlp.vocab, attr="TEXT") + + matcher.add("test", [pattern]) + matcher.add("test", [pattern2]) + + assert len(matcher) == 1 + + with pytest.raises(KeyError): + matcher.remove("wrong_key") + + matcher.remove("test") + + assert len(matcher) == 0