a b/tests/utils/test_span_getters.py
1
import pytest
2
from confit import validate_arguments
3
4
import edsnlp
5
import edsnlp.pipes as eds
6
from edsnlp.utils.span_getters import (
7
    ContextWindow,
8
    get_spans,
9
    make_span_context_getter,
10
    validate_span_setter,
11
)
12
13
14
def test_span_context_getter(lang):
15
    nlp = edsnlp.blank(lang)
16
    nlp.add_pipe("eds.normalizer")
17
    nlp.add_pipe("eds.sentences")
18
    nlp.add_pipe("eds.matcher", config={"terms": {"sentence": "sentence"}})
19
    doc = nlp(
20
        "This is a sentence. "
21
        "This is another sentence. "
22
        "This is a third one. "
23
        "Last sentence."
24
    )
25
26
    span_getter = make_span_context_getter(
27
        context_words=2,
28
        context_sents=1,
29
    )
30
    assert [span_getter(s).text for s in doc.ents] == [
31
        "This is a sentence. This",
32
        "This is another sentence. This",
33
        ". Last sentence.",
34
    ]
35
36
    span_getter = make_span_context_getter(
37
        context_words=0,
38
        context_sents=1,
39
    )
40
    assert [span_getter(s).text for s in doc.ents] == [
41
        "This is a sentence.",
42
        "This is another sentence.",
43
        "Last sentence.",
44
    ]
45
46
    span_getter = make_span_context_getter(
47
        context_words=0,
48
        context_sents=2,
49
    )
50
    assert [span_getter(s).text for s in doc.ents] == [
51
        "This is a sentence. This is another sentence.",
52
        "This is a sentence. This is another sentence. This is a third one.",
53
        "This is a third one. Last sentence.",
54
    ]
55
56
57
def test_span_getter_on_span():
58
    nlp = edsnlp.blank("eds")
59
    nlp.add_pipe(eds.sentences())
60
    nlp.add_pipe(
61
        eds.matcher(
62
            terms={"animal": ["snake", "dog"]},
63
            span_setter=["ents", "animals"],
64
        )
65
    )
66
    doc = nlp(
67
        "There was a snake. "
68
        "His friend was a dog. "
69
        "He liked baking cakes. "
70
        "But since he had no hands, he was a bad baker. "
71
    )
72
    sents = list(doc.sents)
73
    assert str(list(get_spans(sents[0], validate_span_setter("ents")))) == "[snake]"
74
    assert str(list(get_spans(sents[0], validate_span_setter("animals")))) == "[snake]"
75
    assert str(list(get_spans(doc[5:], validate_span_setter("animals")))) == "[dog]"
76
    assert str(list(get_spans(doc[5:], validate_span_setter("*")))) == "[dog]"
77
78
79
def test_span_context_getter_asymmetric(lang):
80
    nlp = edsnlp.blank(lang)
81
    nlp.add_pipe("eds.normalizer")
82
    nlp.add_pipe("eds.sentences")
83
    nlp.add_pipe("eds.matcher", config={"terms": {"animal": "kangaroo"}})
84
    doc = nlp(
85
        "This is a sentence. "
86
        "This is another sentence with a kangaroo. "
87
        "This is a third one. "
88
        "Last sentence."
89
    )
90
91
    span_getter = make_span_context_getter(context_words=2, context_sents=0)
92
    assert [span_getter(s).text for s in doc.ents] == [
93
        "with a kangaroo. This",
94
    ]
95
96
    span_getter = make_span_context_getter(context_words=(2, 1), context_sents=0)
97
    assert [span_getter(s).text for s in doc.ents] == [
98
        "with a kangaroo.",
99
    ]
100
101
    span_getter = make_span_context_getter(context_words=(1, 2), context_sents=0)
102
    assert [span_getter(s).text for s in doc.ents] == [
103
        "a kangaroo. This",
104
    ]
105
106
    span_getter = make_span_context_getter(context_words=0, context_sents=(1, 2))
107
    assert [span_getter(s).text for s in doc.ents] == [
108
        "This is another sentence with a kangaroo. This is a third one.",
109
    ]
110
111
    span_getter = make_span_context_getter(context_words=0, context_sents=(2, 2))
112
    assert [span_getter(s).text for s in doc.ents] == [
113
        "This is a sentence. This is another sentence with a kangaroo. This is a third one."  # noqa: E501
114
    ]
115
116
    span_getter = make_span_context_getter(context_words=0, context_sents=(1, 1))
117
    assert [span_getter(s).text for s in doc.ents] == [
118
        "This is another sentence with a kangaroo."
119
    ]
120
121
    span_getter = make_span_context_getter(context_words=(1000, 0), context_sents=0)
122
    assert [span_getter(s).text for s in doc.ents] == [
123
        "This is a sentence. This is another sentence with a kangaroo"
124
    ]
125
126
    span_getter = make_span_context_getter(
127
        context_words=(1000, 0), context_sents=(1, 2)
128
    )
129
    assert [span_getter(s).text for s in doc.ents] == [
130
        "This is a sentence. This is another sentence with a kangaroo. This is a third one."  # noqa: E501
131
    ]
132
133
134
def test_context_getter_syntax():
135
    @validate_arguments
136
    def get_snippet(span, context: ContextWindow):
137
        return context(span)
138
139
    nlp = edsnlp.blank("eds")
140
    nlp.add_pipe("eds.normalizer")
141
    nlp.add_pipe("eds.sentences")
142
    nlp.add_pipe("eds.matcher", config={"terms": {"dog": "dog"}})
143
    doc = nlp(
144
        "There was a snake. "
145
        "His friend was a dog. "
146
        "He liked baking cakes. "
147
        "But since he had no hands, he was a bad baker. "
148
    )
149
150
    assert (
151
        get_snippet(doc.ents[0], "words[-5:5]").text
152
        == ". His friend was a dog. He liked baking cakes"
153
    )
154
155
    assert get_snippet(doc.ents[0], "words[-5:5] & sent").text == "His friend was a dog"
156
157
    assert (
158
        get_snippet(doc.ents[0], "words[-5:8] | sents[-1:1]").text
159
        == "There was a snake. His friend was a dog. He liked baking cakes. "
160
        "But since"
161
    )
162
163
164
def test_invalid_context_getter_syntax():
165
    @validate_arguments
166
    def apply_context(context: ContextWindow):
167
        pass
168
169
    apply_context("sents[-2:2]")
170
171
    with pytest.raises(ValueError):
172
        apply_context("stuff[-2:2]")