edsnlp / Git / [cad161] /tests/data/test

Models:

philipB/

edsnlp

Downloads: 1

[cad161]: / tests / data / test_spark.py

History

Download this file

30 lines (24 with data), 877 Bytes

import edsnlp


def test_read_write(blank_nlp, text, df_notes_pyspark):
    # line below is just to mix params to avoid running too many tests
    shuffle = "dataset" if blank_nlp.lang == "eds" else False

    reader = edsnlp.data.from_spark(
        df_notes_pyspark,
        converter="omop",
        nlp=blank_nlp,
        shuffle=shuffle,
    ).set_processing(backend="simple")
    doc = list(reader)[0]
    assert doc.text == text

    blank_nlp.add_pipe("eds.matcher", config={"terms": {"douleur": ["douleurs"]}})
    blank_nlp.add_pipe("eds.negation")
    docs = blank_nlp.pipe(reader)

    writer = edsnlp.data.to_spark(
        docs,
        converter="omop",
        span_attributes=["negation"],
        span_getter=["ents"],
    )
    res = writer.toPandas().to_dict(orient="records")
    assert len(res) == 20
    assert sum(len(r["entities"]) for r in res) == 20