edsnlp / Git / [cad161] /tests/data/test

Models:

philipB/

edsnlp

Downloads: 1

[cad161]: / tests / data / test_conll.py

History

Download this file

39 lines (34 with data), 1.1 kB

from itertools import islice
from pathlib import Path

import pytest
from typing_extensions import Literal

import edsnlp


@pytest.mark.parametrize("num_cpu_workers", [0, 2])
@pytest.mark.parametrize("shuffle", ["dataset"])
def test_read_shuffle_loop(
    num_cpu_workers: int,
    shuffle: Literal["dataset", "fragment"],
):
    input_file = (
        Path(__file__).parent.parent.resolve() / "training" / "rhapsodie_sample.conllu"
    )
    notes = edsnlp.data.read_conll(
        input_file,
        shuffle=shuffle,
        seed=42,
        loop=True,
    ).set_processing(num_cpu_workers=num_cpu_workers)
    notes = list(islice(notes, 6))
    assert len(notes) == 6
    # 32	ce	ce	PRON	_	Gender=Masc|Number=Sing|Person=3|PronType=Dem	30	obl:arg	_	_  # noqa: E501
    word_attrs = {
        "text": "ce",
        "lemma_": "ce",
        "pos_": "PRON",
        "dep_": "obl:arg",
        "morph": "Gender=Masc|Number=Sing|Person=3|PronType=Dem",
        "head": "profité",
    }
    word = notes[0][31]
    for attr, val in word_attrs.items():
        assert str(getattr(word, attr)) == val