--- a +++ b/tests/pipelines/ner/test_umls.py @@ -0,0 +1,60 @@ +import pytest + +pytest.importorskip("umls_downloader", reason="umls_downloader package not found") + +import os # noqa: E402 +from pathlib import Path # noqa: E402 + +from edsnlp.core import PipelineProtocol # noqa: E402 +from edsnlp.pipelines.ner.umls.patterns import get_path, get_patterns # noqa: E402 +from edsnlp.utils.examples import parse_example # noqa: E402 + +examples = [ + "Grosse <ent umls=C0010200>toux</ent> : Le malade a été mordu " + "<ent umls=C1257901>par</ent> des <ent umls=C0002668>Amphibiens</ent>" + " sous le <ent umls=C0022742>genou</ent>" +] +pattern_config = {"lang": ["FRE"], "sources": ["MSHFRE"]} + + +@pytest.mark.skipif(not os.getenv("UMLS_API_KEY"), reason="No UMLS_API_KEY given") +def test_get_patterns(): + path, _, _ = get_path(pattern_config) + assert isinstance(path, Path) + + api_key = os.getenv("UMLS_API_KEY") + + try: + patterns = get_patterns(pattern_config) + except AttributeError: + if not api_key: + raise Exception( + "Missing UMLS_API_KEY env variable to download the UMLS. " + f"UMLS cannot be found at path: {path}.\nTo skip the test, " + f"uninstall the umls_downloader package." + ) + raise + + assert path.exists() + + assert len(patterns) == 48587 + + +@pytest.mark.skipif(not os.getenv("UMLS_API_KEY"), reason="No UMLS_API_KEY given") +def test_add_pipe(blank_nlp: PipelineProtocol): + path, _, _ = get_path(pattern_config) + if not path.exists(): + pytest.xfail( + "The umls_downloader package is installed but " + f"UMLS cannot be found at path: {path}" + ) + + blank_nlp.add_pipe("eds.umls", config=dict(pattern_config=pattern_config)) + + for text, entities in map(parse_example, examples): + doc = blank_nlp(text) + assert len(doc.ents) == len(entities) + + for ent, entity in zip(doc.ents, entities): + assert ent.text == text[entity.start_char : entity.end_char] + assert ent._.umls == ent.kb_id_ == entity.modifiers[0].value