--- a +++ b/tests/utils/test_span_getters.py @@ -0,0 +1,172 @@ +import pytest +from confit import validate_arguments + +import edsnlp +import edsnlp.pipes as eds +from edsnlp.utils.span_getters import ( + ContextWindow, + get_spans, + make_span_context_getter, + validate_span_setter, +) + + +def test_span_context_getter(lang): + nlp = edsnlp.blank(lang) + nlp.add_pipe("eds.normalizer") + nlp.add_pipe("eds.sentences") + nlp.add_pipe("eds.matcher", config={"terms": {"sentence": "sentence"}}) + doc = nlp( + "This is a sentence. " + "This is another sentence. " + "This is a third one. " + "Last sentence." + ) + + span_getter = make_span_context_getter( + context_words=2, + context_sents=1, + ) + assert [span_getter(s).text for s in doc.ents] == [ + "This is a sentence. This", + "This is another sentence. This", + ". Last sentence.", + ] + + span_getter = make_span_context_getter( + context_words=0, + context_sents=1, + ) + assert [span_getter(s).text for s in doc.ents] == [ + "This is a sentence.", + "This is another sentence.", + "Last sentence.", + ] + + span_getter = make_span_context_getter( + context_words=0, + context_sents=2, + ) + assert [span_getter(s).text for s in doc.ents] == [ + "This is a sentence. This is another sentence.", + "This is a sentence. This is another sentence. This is a third one.", + "This is a third one. Last sentence.", + ] + + +def test_span_getter_on_span(): + nlp = edsnlp.blank("eds") + nlp.add_pipe(eds.sentences()) + nlp.add_pipe( + eds.matcher( + terms={"animal": ["snake", "dog"]}, + span_setter=["ents", "animals"], + ) + ) + doc = nlp( + "There was a snake. " + "His friend was a dog. " + "He liked baking cakes. " + "But since he had no hands, he was a bad baker. " + ) + sents = list(doc.sents) + assert str(list(get_spans(sents[0], validate_span_setter("ents")))) == "[snake]" + assert str(list(get_spans(sents[0], validate_span_setter("animals")))) == "[snake]" + assert str(list(get_spans(doc[5:], validate_span_setter("animals")))) == "[dog]" + assert str(list(get_spans(doc[5:], validate_span_setter("*")))) == "[dog]" + + +def test_span_context_getter_asymmetric(lang): + nlp = edsnlp.blank(lang) + nlp.add_pipe("eds.normalizer") + nlp.add_pipe("eds.sentences") + nlp.add_pipe("eds.matcher", config={"terms": {"animal": "kangaroo"}}) + doc = nlp( + "This is a sentence. " + "This is another sentence with a kangaroo. " + "This is a third one. " + "Last sentence." + ) + + span_getter = make_span_context_getter(context_words=2, context_sents=0) + assert [span_getter(s).text for s in doc.ents] == [ + "with a kangaroo. This", + ] + + span_getter = make_span_context_getter(context_words=(2, 1), context_sents=0) + assert [span_getter(s).text for s in doc.ents] == [ + "with a kangaroo.", + ] + + span_getter = make_span_context_getter(context_words=(1, 2), context_sents=0) + assert [span_getter(s).text for s in doc.ents] == [ + "a kangaroo. This", + ] + + span_getter = make_span_context_getter(context_words=0, context_sents=(1, 2)) + assert [span_getter(s).text for s in doc.ents] == [ + "This is another sentence with a kangaroo. This is a third one.", + ] + + span_getter = make_span_context_getter(context_words=0, context_sents=(2, 2)) + assert [span_getter(s).text for s in doc.ents] == [ + "This is a sentence. This is another sentence with a kangaroo. This is a third one." # noqa: E501 + ] + + span_getter = make_span_context_getter(context_words=0, context_sents=(1, 1)) + assert [span_getter(s).text for s in doc.ents] == [ + "This is another sentence with a kangaroo." + ] + + span_getter = make_span_context_getter(context_words=(1000, 0), context_sents=0) + assert [span_getter(s).text for s in doc.ents] == [ + "This is a sentence. This is another sentence with a kangaroo" + ] + + span_getter = make_span_context_getter( + context_words=(1000, 0), context_sents=(1, 2) + ) + assert [span_getter(s).text for s in doc.ents] == [ + "This is a sentence. This is another sentence with a kangaroo. This is a third one." # noqa: E501 + ] + + +def test_context_getter_syntax(): + @validate_arguments + def get_snippet(span, context: ContextWindow): + return context(span) + + nlp = edsnlp.blank("eds") + nlp.add_pipe("eds.normalizer") + nlp.add_pipe("eds.sentences") + nlp.add_pipe("eds.matcher", config={"terms": {"dog": "dog"}}) + doc = nlp( + "There was a snake. " + "His friend was a dog. " + "He liked baking cakes. " + "But since he had no hands, he was a bad baker. " + ) + + assert ( + get_snippet(doc.ents[0], "words[-5:5]").text + == ". His friend was a dog. He liked baking cakes" + ) + + assert get_snippet(doc.ents[0], "words[-5:5] & sent").text == "His friend was a dog" + + assert ( + get_snippet(doc.ents[0], "words[-5:8] | sents[-1:1]").text + == "There was a snake. His friend was a dog. He liked baking cakes. " + "But since" + ) + + +def test_invalid_context_getter_syntax(): + @validate_arguments + def apply_context(context: ContextWindow): + pass + + apply_context("sents[-2:2]") + + with pytest.raises(ValueError): + apply_context("stuff[-2:2]")