[cad161]: / tests / data / test_conll.py

Download this file

39 lines (34 with data), 1.1 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from itertools import islice
from pathlib import Path
import pytest
from typing_extensions import Literal
import edsnlp
@pytest.mark.parametrize("num_cpu_workers", [0, 2])
@pytest.mark.parametrize("shuffle", ["dataset"])
def test_read_shuffle_loop(
num_cpu_workers: int,
shuffle: Literal["dataset", "fragment"],
):
input_file = (
Path(__file__).parent.parent.resolve() / "training" / "rhapsodie_sample.conllu"
)
notes = edsnlp.data.read_conll(
input_file,
shuffle=shuffle,
seed=42,
loop=True,
).set_processing(num_cpu_workers=num_cpu_workers)
notes = list(islice(notes, 6))
assert len(notes) == 6
# 32 ce ce PRON _ Gender=Masc|Number=Sing|Person=3|PronType=Dem 30 obl:arg _ _ # noqa: E501
word_attrs = {
"text": "ce",
"lemma_": "ce",
"pos_": "PRON",
"dep_": "obl:arg",
"morph": "Gender=Masc|Number=Sing|Person=3|PronType=Dem",
"head": "profité",
}
word = notes[0][31]
for attr, val in word_attrs.items():
assert str(getattr(word, attr)) == val