a b/notebooks/sections/testing.md
1
---
2
jupyter:
3
  jupytext:
4
    formats: ipynb,md
5
    text_representation:
6
      extension: .md
7
      format_name: markdown
8
      format_version: "1.3"
9
      jupytext_version: 1.11.4
10
  kernelspec:
11
    display_name: Python 3
12
    language: python
13
    name: python3
14
---
15
16
```python
17
%reload_ext autoreload
18
%autoreload 2
19
```
20
21
```python
22
import pandas as pd
23
```
24
25
```python
26
import os
27
```
28
29
```python
30
import context
31
```
32
33
```python
34
from edsnlp.utils.brat import BratConnector
35
```
36
37
```python
38
39
```
40
41
```python
42
import spacy
43
```
44
45
# Sections dataset
46
47
We are using [Ivan Lerner's work at EDS](https://gitlab.eds.aphp.fr/IvanL/section_dataset). Make sure you clone the repo.
48
49
```python
50
data_dir = '../../data/section_dataset/'
51
```
52
53
```python
54
brat = BratConnector(data_dir)
55
```
56
57
```python
58
texts, annotations = brat.get_brat()
59
```
60
61
```python
62
texts
63
```
64
65
```python
66
nlp = spacy.blank('fr')
67
```
68
69
```python
70
nlp.add_pipe('normaliser')
71
nlp.add_pipe('sections')
72
```
73
74
```python
75
df = texts.copy()
76
```
77
78
```python
79
df['doc'] = df.note_text.apply(nlp)
80
```
81
82
```python
83
def assign_id(row):
84
    row.doc._.note_id = row.note_id
85
```
86
87
```python
88
df.apply(assign_id, axis=1);
89
```
90
91
```python
92
df['matches'] = df.doc.apply(lambda d: [dict(
93
    lexical_variant=s.text,
94
    label=s.label_,
95
    start=s.start_char,
96
    end=s.end_char
97
) for s in d._.section_titles])
98
```
99
100
```python
101
df = df[['note_text', 'note_id', 'matches']].explode('matches')
102
```
103
104
```python
105
df = df.dropna()
106
```
107
108
```python
109
df[['lexical_variant', 'label', 'start', 'end']] = df.matches.apply(pd.Series)
110
```
111
112
```python
113
df = df.drop('matches', axis=1)
114
```
115
116
```python
117
df.head(20)
118
```
119
120
```python
121
df = df.rename(columns={'start': 'offset_begin', 'end': 'offset_end', 'label': 'label_value'})
122
```
123
124
```python
125
df['label_name'] = df.label_value
126
```
127
128
```python
129
df['modifier_type'] = ''
130
df['modifier_result'] = ''
131
```
132
133
```python
134
from ipywidgets import Output, Button, VBox, Layout, Text, HTML
135
from IPython.display import display
136
from labeltool.labelling import GlobalLabels, Labels, Labelling
137
138
out = Output()
139
```
140
141
```python
142
labels = Labels()
143
144
for label in df.label_value.unique():
145
    labels.add(name = label,
146
               color = 'green',
147
               selection_type = 'button')
148
```
149
150
```python
151
labeller = Labelling(
152
    df,
153
    save_path='testing.pickle',
154
    labels_dict=labels.dict,
155
    from_save=True,
156
    out=out,
157
    display=display,
158
)
159
```
160
161
```python
162
labeller.run()
163
out
164
```
165
166
```python
167
168
```