jupyter:
jupytext:
formats: md,ipynb
main_language: python
text_representation:
extension: .md
format_name: markdown
format_version: "1.3"
jupytext_version: 1.13.0
kernelspec:
display_name: "Python 3.9.5 64-bit ('.env': venv)"
name: python3
import context
import spacy
from edsnlp.connectors.omop import OmopConnector
text = (
"Le patient est arrivé le 23 août (23/08/2021). "
"Il dit avoir eu mal au ventre hier. "
"L'année dernière, on lui avait prescrit du doliprane."
)
nlp = spacy.blank('fr')
nlp.add_pipe('normalizer')
nlp.add_pipe('matcher', config=dict(regex=dict(word=r"(\w+)")))
doc = nlp(text)
doc._.note_id = 0
docs = []
for i in range(10):
doc = nlp(f"Doc{i:02}" + text)
doc._.note_id = i
docs.append(doc)
connector = OmopConnector(nlp)
note, note_nlp = connector.docs2omop(docs)
note
new_docs = connector.omop2docs(note, note_nlp)
new_docs[0].text == docs[0].text
len(docs[0].ents) == len(new_docs[0].ents)
for e, o in zip(new_docs[0].ents, docs[0].ents):
assert e.text == o.text