Switch to unified view

a b/notebooks/endlines/endlines-example.md
1
---
2
jupyter:
3
  jupytext:
4
    text_representation:
5
      extension: .md
6
      format_name: markdown
7
      format_version: "1.3"
8
      jupytext_version: 1.13.0
9
  kernelspec:
10
    display_name: "Python 3.7.1 64-bit ('env_debug': conda)"
11
    name: python3
12
---
13
14
```python
15
%load_ext autoreload
16
%autoreload 2
17
```
18
19
```python
20
import spacy
21
```
22
23
```python
24
from edsnlp.pipelines.endlines.endlinesmodel import EndLinesModel
25
```
26
27
```python
28
import pandas as pd
29
```
30
31
```python
32
from spacy import displacy
33
```
34
35
# Train
36
37
```python
38
nlp = spacy.blank("fr")
39
```
40
41
```python
42
text =  r"""Le patient est arrivé hier soir.
43
Il est accompagné par son fils
44
45
ANTECEDENTS
46
Il a fait une TS en 2010;
47
Fumeur, il est arreté il a 5 mois
48
Chirurgie de coeur en 2011
49
CONCLUSION
50
Il doit prendre
51
le medicament indiqué 3 fois par jour. Revoir médecin
52
dans 1 mois.
53
DIAGNOSTIC :
54
55
Antecedents Familiaux:
56
- 1. Père avec diabete
57
58
"""
59
```
60
61
```python
62
doc = nlp(text)
63
```
64
65
```python
66
text2 = """J'aime le \nfromage...\n"""
67
doc2 = nlp(text2)
68
```
69
70
```python
71
text3 = '\nIntervention(s) - acte(s) réalisé(s) :\nParathyroïdectomie élective le [DATE]'
72
doc3 = nlp(text3)
73
```
74
75
```python
76
corpus = [doc,doc2, doc3]
77
```
78
79
```python
80
endlines = EndLinesModel(nlp = nlp)
81
```
82
83
```python
84
df = endlines.fit_and_predict(corpus)
85
df.head()
86
```
87
88
```python
89
pd.set_option("max_columns",None)
90
```
91
92
```python
93
# Save model
94
PATH= "/path_to_model"
95
endlines.save()
96
```
97
98
# Predict
99
100
```python
101
df2 = pd.DataFrame({"A1":[12646014,4191891561709484510 , 1668228190683662995],
102
                   "A2":[12646065887601541794,4191891561709484510 , 1668228190683662995],
103
                   "A3": ["UPPER","DIGIT","sdf"],
104
                   "A4": ["DIGIT","ENUMERATION","STRONG_PUNCT"],
105
                   "B1": [.5,.7,10.2],
106
                   "B2": [.0,.2,-10.2],
107
                  "BLANK_LINE":[False,True,False]})
108
df2 = endlines.predict(df2)
109
df2
110
```
111
112
# Set spans in training data (for viz)
113
114
```python
115
set_spans = endlines.set_spans
116
```
117
118
```python
119
set_spans(corpus, df)
120
```
121
122
```python
123
df.loc[df.DOC_ID==1]
124
```
125
126
```python
127
doc_exemple = corpus[1]
128
```
129
130
```python
131
doc_exemple.spans
132
```
133
134
```python
135
doc_exemple.ents = tuple(doc_exemple.spans['new_lines'])
136
```
137
138
```python
139
displacy.render(doc_exemple, style="ent", options={"colors":{"end_line":"green","space":"red"}})
140
```
141
142
# Pipe spacy (inference)
143
144
```python
145
146
```
147
148
```python
149
nlp = spacy.blank("fr")
150
```
151
152
```python
153
nlp.add_pipe("endlines", config=dict(model_path = PATH))
154
```
155
156
```python
157
docs2 = list(nlp.pipe([text,text2,text3]))
158
```
159
160
```python
161
doc_exemple = docs2[1]
162
```
163
164
```python
165
doc_exemple
166
```
167
168
```python
169
from edsnlp.utils.filter import filter_spans
170
spaces = tuple(s for s in doc_exemple.spans['new_lines'] if s.label_=="space")
171
ents = doc_exemple.ents + spaces
172
ents_f = filter_spans(ents)
173
doc_exemple.ents = ents_f
174
```
175
176
```python
177
displacy.render(doc_exemple, style="ent", options={"colors":{"space":"red"}})
178
```
179
180
```python
181
182
```