[cad161]: / notebooks / connectors / omop.md

Download this file

100 lines (77 with data), 1.4 kB


jupyter:
jupytext:
formats: md,ipynb
main_language: python
text_representation:
extension: .md
format_name: markdown
format_version: "1.3"
jupytext_version: 1.13.0
kernelspec:
display_name: "Python 3.9.5 64-bit ('.env': venv)"
name: python3


import context
import spacy
from edsnlp.connectors.omop import OmopConnector

Date detection

text = (
    "Le patient est arrivé le 23 août (23/08/2021). "
    "Il dit avoir eu mal au ventre hier. "
    "L'année dernière, on lui avait prescrit du doliprane."
)
nlp = spacy.blank('fr')
nlp.add_pipe('normalizer')
nlp.add_pipe('matcher', config=dict(regex=dict(word=r"(\w+)")))
doc = nlp(text)
doc._.note_id = 0
docs = []

for i in range(10):
    doc = nlp(f"Doc{i:02}" + text)
    doc._.note_id = i
    docs.append(doc)
connector = OmopConnector(nlp)
note, note_nlp = connector.docs2omop(docs)
note
new_docs = connector.omop2docs(note, note_nlp)
new_docs[0].text == docs[0].text
len(docs[0].ents) == len(new_docs[0].ents)
for e, o in zip(new_docs[0].ents, docs[0].ents):
    assert e.text == o.text