[cad161]: / notebooks / endlines / endlines-example.md

Download this file

183 lines (141 with data), 2.9 kB


jupyter:
jupytext:
text_representation:
extension: .md
format_name: markdown
format_version: "1.3"
jupytext_version: 1.13.0
kernelspec:
display_name: "Python 3.7.1 64-bit ('env_debug': conda)"
name: python3


%load_ext autoreload
%autoreload 2
import spacy
from edsnlp.pipelines.endlines.endlinesmodel import EndLinesModel
import pandas as pd
from spacy import displacy

Train

nlp = spacy.blank("fr")
text =  r"""Le patient est arrivé hier soir.
Il est accompagné par son fils

ANTECEDENTS
Il a fait une TS en 2010;
Fumeur, il est arreté il a 5 mois
Chirurgie de coeur en 2011
CONCLUSION
Il doit prendre
le medicament indiqué 3 fois par jour. Revoir médecin
dans 1 mois.
DIAGNOSTIC :

Antecedents Familiaux:
- 1. Père avec diabete

"""
doc = nlp(text)
text2 = """J'aime le \nfromage...\n"""
doc2 = nlp(text2)
text3 = '\nIntervention(s) - acte(s) réalisé(s) :\nParathyroïdectomie élective le [DATE]'
doc3 = nlp(text3)
corpus = [doc,doc2, doc3]
endlines = EndLinesModel(nlp = nlp)
df = endlines.fit_and_predict(corpus)
df.head()
pd.set_option("max_columns",None)
# Save model
PATH= "/path_to_model"
endlines.save()

Predict

df2 = pd.DataFrame({"A1":[12646014,4191891561709484510 , 1668228190683662995],
                   "A2":[12646065887601541794,4191891561709484510 , 1668228190683662995],
                   "A3": ["UPPER","DIGIT","sdf"],
                   "A4": ["DIGIT","ENUMERATION","STRONG_PUNCT"],
                   "B1": [.5,.7,10.2],
                   "B2": [.0,.2,-10.2],
                  "BLANK_LINE":[False,True,False]})
df2 = endlines.predict(df2)
df2

Set spans in training data (for viz)

set_spans = endlines.set_spans
set_spans(corpus, df)
df.loc[df.DOC_ID==1]
doc_exemple = corpus[1]
doc_exemple.spans
doc_exemple.ents = tuple(doc_exemple.spans['new_lines'])
displacy.render(doc_exemple, style="ent", options={"colors":{"end_line":"green","space":"red"}})

Pipe spacy (inference)


nlp = spacy.blank("fr")
nlp.add_pipe("endlines", config=dict(model_path = PATH))
docs2 = list(nlp.pipe([text,text2,text3]))
doc_exemple = docs2[1]
doc_exemple
from edsnlp.utils.filter import filter_spans
spaces = tuple(s for s in doc_exemple.spans['new_lines'] if s.label_=="space")
ents = doc_exemple.ents + spaces
ents_f = filter_spans(ents)
doc_exemple.ents = ents_f
displacy.render(doc_exemple, style="ent", options={"colors":{"space":"red"}})