[cad161]: / tests / utils / test_span_getters.py

Download this file

173 lines (144 with data), 5.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import pytest
from confit import validate_arguments
import edsnlp
import edsnlp.pipes as eds
from edsnlp.utils.span_getters import (
ContextWindow,
get_spans,
make_span_context_getter,
validate_span_setter,
)
def test_span_context_getter(lang):
nlp = edsnlp.blank(lang)
nlp.add_pipe("eds.normalizer")
nlp.add_pipe("eds.sentences")
nlp.add_pipe("eds.matcher", config={"terms": {"sentence": "sentence"}})
doc = nlp(
"This is a sentence. "
"This is another sentence. "
"This is a third one. "
"Last sentence."
)
span_getter = make_span_context_getter(
context_words=2,
context_sents=1,
)
assert [span_getter(s).text for s in doc.ents] == [
"This is a sentence. This",
"This is another sentence. This",
". Last sentence.",
]
span_getter = make_span_context_getter(
context_words=0,
context_sents=1,
)
assert [span_getter(s).text for s in doc.ents] == [
"This is a sentence.",
"This is another sentence.",
"Last sentence.",
]
span_getter = make_span_context_getter(
context_words=0,
context_sents=2,
)
assert [span_getter(s).text for s in doc.ents] == [
"This is a sentence. This is another sentence.",
"This is a sentence. This is another sentence. This is a third one.",
"This is a third one. Last sentence.",
]
def test_span_getter_on_span():
nlp = edsnlp.blank("eds")
nlp.add_pipe(eds.sentences())
nlp.add_pipe(
eds.matcher(
terms={"animal": ["snake", "dog"]},
span_setter=["ents", "animals"],
)
)
doc = nlp(
"There was a snake. "
"His friend was a dog. "
"He liked baking cakes. "
"But since he had no hands, he was a bad baker. "
)
sents = list(doc.sents)
assert str(list(get_spans(sents[0], validate_span_setter("ents")))) == "[snake]"
assert str(list(get_spans(sents[0], validate_span_setter("animals")))) == "[snake]"
assert str(list(get_spans(doc[5:], validate_span_setter("animals")))) == "[dog]"
assert str(list(get_spans(doc[5:], validate_span_setter("*")))) == "[dog]"
def test_span_context_getter_asymmetric(lang):
nlp = edsnlp.blank(lang)
nlp.add_pipe("eds.normalizer")
nlp.add_pipe("eds.sentences")
nlp.add_pipe("eds.matcher", config={"terms": {"animal": "kangaroo"}})
doc = nlp(
"This is a sentence. "
"This is another sentence with a kangaroo. "
"This is a third one. "
"Last sentence."
)
span_getter = make_span_context_getter(context_words=2, context_sents=0)
assert [span_getter(s).text for s in doc.ents] == [
"with a kangaroo. This",
]
span_getter = make_span_context_getter(context_words=(2, 1), context_sents=0)
assert [span_getter(s).text for s in doc.ents] == [
"with a kangaroo.",
]
span_getter = make_span_context_getter(context_words=(1, 2), context_sents=0)
assert [span_getter(s).text for s in doc.ents] == [
"a kangaroo. This",
]
span_getter = make_span_context_getter(context_words=0, context_sents=(1, 2))
assert [span_getter(s).text for s in doc.ents] == [
"This is another sentence with a kangaroo. This is a third one.",
]
span_getter = make_span_context_getter(context_words=0, context_sents=(2, 2))
assert [span_getter(s).text for s in doc.ents] == [
"This is a sentence. This is another sentence with a kangaroo. This is a third one." # noqa: E501
]
span_getter = make_span_context_getter(context_words=0, context_sents=(1, 1))
assert [span_getter(s).text for s in doc.ents] == [
"This is another sentence with a kangaroo."
]
span_getter = make_span_context_getter(context_words=(1000, 0), context_sents=0)
assert [span_getter(s).text for s in doc.ents] == [
"This is a sentence. This is another sentence with a kangaroo"
]
span_getter = make_span_context_getter(
context_words=(1000, 0), context_sents=(1, 2)
)
assert [span_getter(s).text for s in doc.ents] == [
"This is a sentence. This is another sentence with a kangaroo. This is a third one." # noqa: E501
]
def test_context_getter_syntax():
@validate_arguments
def get_snippet(span, context: ContextWindow):
return context(span)
nlp = edsnlp.blank("eds")
nlp.add_pipe("eds.normalizer")
nlp.add_pipe("eds.sentences")
nlp.add_pipe("eds.matcher", config={"terms": {"dog": "dog"}})
doc = nlp(
"There was a snake. "
"His friend was a dog. "
"He liked baking cakes. "
"But since he had no hands, he was a bad baker. "
)
assert (
get_snippet(doc.ents[0], "words[-5:5]").text
== ". His friend was a dog. He liked baking cakes"
)
assert get_snippet(doc.ents[0], "words[-5:5] & sent").text == "His friend was a dog"
assert (
get_snippet(doc.ents[0], "words[-5:8] | sents[-1:1]").text
== "There was a snake. His friend was a dog. He liked baking cakes. "
"But since"
)
def test_invalid_context_getter_syntax():
@validate_arguments
def apply_context(context: ContextWindow):
pass
apply_context("sents[-2:2]")
with pytest.raises(ValueError):
apply_context("stuff[-2:2]")