Diff of /demo/app.py [000000] .. [cad161]

Switch to unified view

a b/demo/app.py
1
from typing import Any
2
3
import pandas as pd
4
import streamlit as st
5
from spacy import displacy
6
7
import edsnlp
8
import edsnlp.pipes as eds
9
from edsnlp.utils.filter import filter_spans
10
11
DEFAULT_TEXT = """\
12
Motif :
13
Le patient est admis le 29 août pour des difficultés respiratoires.
14
15
Antécédents familiaux :
16
Le père du patient n'est pas asthmatique.
17
18
HISTOIRE DE LA MALADIE
19
Le patient dit avoir de la toux depuis trois jours. \
20
Elle a empiré jusqu'à nécessiter un passage aux urgences.
21
A noter deux petits kystes bénins de 1 et 2cm biopsiés en 2005.
22
23
Priorité: 2 (établie par l'IAO à l'entrée)
24
25
adicaps ABCD0A12 et ABCD0A13
26
27
Conclusion
28
Possible infection au coronavirus. Prescription de paracétomol pour la fièvre.\
29
"""
30
31
REGEX = """
32
# RegEx and terms matcher
33
nlp.add_pipe(
34
    eds.matcher(
35
        regex=dict(custom=r"{custom_regex}"),
36
        attr="NORM",
37
    ),
38
)
39
"""
40
41
CODE = """
42
import edsnlp, edsnlp.pipes as eds
43
44
# Declare the pipeline
45
nlp = edsnlp.blank("eds")
46
47
# General-purpose components
48
nlp.add_pipe(eds.normalizer())
49
nlp.add_pipe(eds.sentences())
50
{pipes}
51
# Qualifier pipes
52
nlp.add_pipe(eds.negation())
53
nlp.add_pipe(eds.family())
54
nlp.add_pipe(eds.hypothesis())
55
nlp.add_pipe(eds.rspeech())
56
57
# Define the note text
58
text = {text}
59
60
# Apply the pipeline
61
doc = nlp(text)
62
63
# Explore matched elements
64
doc.ents
65
"""
66
67
PIPES = {
68
    "Drugs": "drugs",
69
    "CIM10": "cim10",
70
    "Dates": "dates",
71
    "Quantities": "quantities",
72
    "Charlson": "charlson",
73
    "SOFA": "sofa",
74
    "Elston & Ellis": "elston_ellis",
75
    "TNM": "tnm",
76
    "Priority": "emergency_priority",
77
    "CCMU": "emergency_ccmu",
78
    "GEMSA": "emergency_gemsa",
79
    "Covid": "covid",
80
    "Adicap": "adicap",
81
    "Diabetes": "diabetes",
82
    "Tobacco": "tobacco",
83
    "AIDS": "aids",
84
    "Lymphoma": "lymphoma",
85
    "Leukemia": "leukemia",
86
    "Solid Tumor": "solid_tumor",
87
    "CKD": "ckd",
88
    "Hemiplegia": "hemiplegia",
89
    "Liver Disease": "liver_disease",
90
    "Peptic Ulcer Disease": "peptic_ulcer_disease",
91
    "Connective Tissue Disease": "connective_tissue_disease",
92
    "COPD": "copd",
93
    "Dementia": "dementia",
94
    "Cerebrovascular Accident": "cerebrovascular_accident",
95
    "Peripheral Vascular Disease": "peripheral_vascular_disease",
96
    "Congestive Heart Failure": "congestive_heart_failure",
97
    "Myocardial Infarction": "myocardial_infarction",
98
    "Alcohol": "alcohol",
99
}
100
101
102
@st.cache_resource()
103
def load_model(custom_regex: str, **enabled):
104
    pipes = []
105
106
    # Declare the pipeline
107
    nlp = edsnlp.blank("eds")
108
    nlp.add_pipe(eds.normalizer())
109
    nlp.add_pipe(eds.sentences())
110
111
    for title, name in PIPES.items():
112
        if name == "drugs":
113
            if enabled["drugs"]:
114
                if enabled["fuzzy_drugs"]:
115
                    nlp.add_pipe(eds.drugs(term_matcher="simstring"))
116
                    pipes.append('nlp.add_pipe(eds.drugs(term_matcher="simstring"))')
117
                else:
118
                    nlp.add_pipe(eds.drugs())
119
                    pipes.append("nlp.add_pipe(eds.drugs())")
120
121
        else:
122
            if enabled[name]:
123
                nlp.add_pipe(f"eds.{name}")
124
                pipes.append(f"nlp.add_pipe(eds.{name}())")
125
126
    if pipes:
127
        pipes.insert(0, "# Entity extraction pipes")
128
129
    if custom_regex:
130
        nlp.add_pipe(
131
            eds.matcher(
132
                regex=dict(custom=custom_regex),
133
                attr="NORM",
134
            ),
135
        )
136
137
        regex = REGEX.format(custom_regex=custom_regex)
138
139
    else:
140
        regex = ""
141
142
    nlp.add_pipe(eds.negation())
143
    nlp.add_pipe(eds.family())
144
    nlp.add_pipe(eds.hypothesis())
145
    nlp.add_pipe(eds.rspeech())
146
147
    return nlp, pipes, regex
148
149
150
st.set_page_config(
151
    page_title="EDS-NLP Demo",
152
    page_icon="📄",
153
)
154
155
st.title("EDS-NLP")
156
157
st.warning(
158
    "You should **not** put sensitive data in the example, as this application "
159
    "**is not secure**."
160
)
161
162
st.sidebar.header("About")
163
st.sidebar.markdown(
164
    "EDS-NLP is a contributive effort maintained by AP-HP's Data Science team. "
165
    "Have a look at the "
166
    "[documentation](https://aphp.github.io/edsnlp/) for "
167
    "more information on the available components."
168
)
169
170
st.sidebar.header("Pipeline")
171
st.sidebar.markdown(
172
    "This example runs a simplistic pipeline detecting a few synonyms for "
173
    "COVID-related entities.\n\n"
174
    "You can add or remove pre-defined pipeline components, and see how "
175
    "the pipeline reacts. You can also search for your own custom RegEx."
176
)
177
178
st.sidebar.header("Custom RegEx")
179
st_custom_regex = st.sidebar.text_input(
180
    "Regular Expression:",
181
    r"asthmatique|difficult[ée]s?\srespiratoires?",
182
)
183
st.sidebar.markdown("The RegEx you defined above is detected under the `custom` label.")
184
185
st.sidebar.subheader("Pipeline Components")
186
st_pipes = {}
187
188
st_pipes["cim10"] = st.sidebar.checkbox("CIM10 (loading can be slow)", value=False)
189
st_drugs_container = st.sidebar.columns([1, 2])
190
st_pipes["drugs"] = st_drugs_container[0].checkbox("Drugs", value=True)
191
st_fuzzy_drugs = st_drugs_container[1].checkbox(
192
    "Fuzzy drugs search", value=True, disabled=not st_pipes["drugs"]
193
)
194
for title, name in PIPES.items():
195
    if name == "drugs" or name == "cim10":
196
        continue
197
    st_pipes[name] = st.sidebar.checkbox(title, value=True)
198
st.sidebar.markdown(
199
    "These are just a few of the components provided out-of-the-box by EDS-NLP. "
200
    "See the [documentation](https://aphp.github.io/edsnlp/latest/pipes/) "
201
    "for detail."
202
)
203
204
model_load_state = st.info("Loading model...")
205
206
nlp, pipes, regex = load_model(
207
    fuzzy_drugs=st_fuzzy_drugs,
208
    custom_regex=st_custom_regex,
209
    **st_pipes,
210
)
211
212
model_load_state.empty()
213
214
st.header("Enter a text to analyse:")
215
text = st.text_area(
216
    "Modify the following text and see the pipeline react :",
217
    DEFAULT_TEXT,
218
    height=375,
219
)
220
221
doc = nlp(text)
222
doc.ents = filter_spans(
223
    (*doc.ents, *doc.spans.get("dates", []), *doc.spans.get("quantities", []))
224
)
225
226
st.header("Visualisation")
227
228
st.markdown(
229
    "The pipeline extracts simple entities using a dictionnary of RegEx (see the "
230
    "[Export the pipeline section](#export-the-pipeline) for more information)."
231
)
232
233
category20 = [
234
    "#1f77b4",
235
    "#aec7e8",
236
    "#ff7f0e",
237
    "#ffbb78",
238
    "#2ca02c",
239
    "#98df8a",
240
    "#d62728",
241
    "#ff9896",
242
    "#9467bd",
243
    "#c5b0d5",
244
    "#8c564b",
245
    "#c49c94",
246
    "#e377c2",
247
    "#f7b6d2",
248
    "#7f7f7f",
249
    "#c7c7c7",
250
    "#bcbd22",
251
    "#dbdb8d",
252
    "#17becf",
253
    "#9edae5",
254
]
255
256
labels = [
257
    "date",
258
    "covid",
259
    "drug",
260
    "cim10",
261
    "emergency_priority",
262
    "sofa",
263
    "charlson",
264
    "size",
265
    "weight",
266
    "adicap",
267
]
268
269
colors = {label: cat for label, cat in zip(labels, category20)}
270
colors["custom"] = "linear-gradient(90deg, #aa9cfc, #fc9ce7)"
271
options = {
272
    "colors": colors,
273
}
274
275
html = displacy.render(doc, style="ent", options=options)
276
html = html.replace("line-height: 2.5;", "line-height: 2.25;")
277
html = (
278
    '<div style="padding: 10px; border: solid 2px; border-radius: 10px; '
279
    f'border-color: #afc6e0;">{html}</div>'
280
)
281
st.write(html, unsafe_allow_html=True)
282
283
data = []
284
for ent in doc.ents:
285
    d = dict(
286
        start=ent.start_char,
287
        end=ent.end_char,
288
        text=ent.text,
289
        label=ent.label_,
290
        normalized_value=str(ent._.value or ""),
291
        negation="YES" if ent._.negation else "NO",
292
        family="YES" if ent._.family else "NO",
293
        hypothesis="YES" if ent._.hypothesis else "NO",
294
        reported_speech="YES" if ent._.reported_speech else "NO",
295
    )
296
297
    data.append(d)
298
299
st.header("Entity qualification")
300
301
302
def color_qualifiers(val: Any) -> str:
303
    """
304
    Add color to qualifiers.
305
306
    Parameters
307
    ----------
308
    val : Any
309
        DataFrame value
310
311
    Returns
312
    -------
313
    str
314
        style
315
    """
316
    if val == "NO":
317
        return "color: #dc3545;"
318
    elif val == "YES":
319
        return "color: #198754;"
320
    return ""
321
322
323
if data:
324
    df = pd.DataFrame.from_records(data)
325
    df.normalized_value = df.normalized_value.replace({"None": ""})
326
327
    df = df.style.applymap(color_qualifiers)
328
329
    st.dataframe(df)
330
331
else:
332
    st.markdown("You pipeline did not match any entity...")
333
334
pipes_text = ""
335
336
if pipes:
337
    pipes_text += "\n" + "\n".join(pipes) + "\n"
338
if regex:
339
    pipes_text += regex
340
341
code = CODE.format(
342
    pipes=pipes_text,
343
    text=f'"""\n{text}\n"""',
344
)
345
346
st.header("Export the pipeline")
347
st.markdown(
348
    "The code below recreates the pipeline. Copy and paste it "
349
    "in a Jupyter Notebook to interact with it."
350
)
351
with st.expander("Show the runnable code"):
352
    st.markdown(f"```python\n{code}\n```\n\nThis code runs as is.")