edsnlp / Git / Diff of /demo/app.py

Models:
philipB/
edsnlp
Downloads: 1
Diff of /demo/app.py [000000] .. [cad161]
Switch to side-by-side view

--- a
+++ b/demo/app.py
@@ -0,0 +1,352 @@
+from typing import Any
+
+import pandas as pd
+import streamlit as st
+from spacy import displacy
+
+import edsnlp
+import edsnlp.pipes as eds
+from edsnlp.utils.filter import filter_spans
+
+DEFAULT_TEXT = """\
+Motif :
+Le patient est admis le 29 août pour des difficultés respiratoires.
+
+Antécédents familiaux :
+Le père du patient n'est pas asthmatique.
+
+HISTOIRE DE LA MALADIE
+Le patient dit avoir de la toux depuis trois jours. \
+Elle a empiré jusqu'à nécessiter un passage aux urgences.
+A noter deux petits kystes bénins de 1 et 2cm biopsiés en 2005.
+
+Priorité: 2 (établie par l'IAO à l'entrée)
+
+adicaps ABCD0A12 et ABCD0A13
+
+Conclusion
+Possible infection au coronavirus. Prescription de paracétomol pour la fièvre.\
+"""
+
+REGEX = """
+# RegEx and terms matcher
+nlp.add_pipe(
+    eds.matcher(
+        regex=dict(custom=r"{custom_regex}"),
+        attr="NORM",
+    ),
+)
+"""
+
+CODE = """
+import edsnlp, edsnlp.pipes as eds
+
+# Declare the pipeline
+nlp = edsnlp.blank("eds")
+
+# General-purpose components
+nlp.add_pipe(eds.normalizer())
+nlp.add_pipe(eds.sentences())
+{pipes}
+# Qualifier pipes
+nlp.add_pipe(eds.negation())
+nlp.add_pipe(eds.family())
+nlp.add_pipe(eds.hypothesis())
+nlp.add_pipe(eds.rspeech())
+
+# Define the note text
+text = {text}
+
+# Apply the pipeline
+doc = nlp(text)
+
+# Explore matched elements
+doc.ents
+"""
+
+PIPES = {
+    "Drugs": "drugs",
+    "CIM10": "cim10",
+    "Dates": "dates",
+    "Quantities": "quantities",
+    "Charlson": "charlson",
+    "SOFA": "sofa",
+    "Elston & Ellis": "elston_ellis",
+    "TNM": "tnm",
+    "Priority": "emergency_priority",
+    "CCMU": "emergency_ccmu",
+    "GEMSA": "emergency_gemsa",
+    "Covid": "covid",
+    "Adicap": "adicap",
+    "Diabetes": "diabetes",
+    "Tobacco": "tobacco",
+    "AIDS": "aids",
+    "Lymphoma": "lymphoma",
+    "Leukemia": "leukemia",
+    "Solid Tumor": "solid_tumor",
+    "CKD": "ckd",
+    "Hemiplegia": "hemiplegia",
+    "Liver Disease": "liver_disease",
+    "Peptic Ulcer Disease": "peptic_ulcer_disease",
+    "Connective Tissue Disease": "connective_tissue_disease",
+    "COPD": "copd",
+    "Dementia": "dementia",
+    "Cerebrovascular Accident": "cerebrovascular_accident",
+    "Peripheral Vascular Disease": "peripheral_vascular_disease",
+    "Congestive Heart Failure": "congestive_heart_failure",
+    "Myocardial Infarction": "myocardial_infarction",
+    "Alcohol": "alcohol",
+}
+
+
+@st.cache_resource()
+def load_model(custom_regex: str, **enabled):
+    pipes = []
+
+    # Declare the pipeline
+    nlp = edsnlp.blank("eds")
+    nlp.add_pipe(eds.normalizer())
+    nlp.add_pipe(eds.sentences())
+
+    for title, name in PIPES.items():
+        if name == "drugs":
+            if enabled["drugs"]:
+                if enabled["fuzzy_drugs"]:
+                    nlp.add_pipe(eds.drugs(term_matcher="simstring"))
+                    pipes.append('nlp.add_pipe(eds.drugs(term_matcher="simstring"))')
+                else:
+                    nlp.add_pipe(eds.drugs())
+                    pipes.append("nlp.add_pipe(eds.drugs())")
+
+        else:
+            if enabled[name]:
+                nlp.add_pipe(f"eds.{name}")
+                pipes.append(f"nlp.add_pipe(eds.{name}())")
+
+    if pipes:
+        pipes.insert(0, "# Entity extraction pipes")
+
+    if custom_regex:
+        nlp.add_pipe(
+            eds.matcher(
+                regex=dict(custom=custom_regex),
+                attr="NORM",
+            ),
+        )
+
+        regex = REGEX.format(custom_regex=custom_regex)
+
+    else:
+        regex = ""
+
+    nlp.add_pipe(eds.negation())
+    nlp.add_pipe(eds.family())
+    nlp.add_pipe(eds.hypothesis())
+    nlp.add_pipe(eds.rspeech())
+
+    return nlp, pipes, regex
+
+
+st.set_page_config(
+    page_title="EDS-NLP Demo",
+    page_icon="📄",
+)
+
+st.title("EDS-NLP")
+
+st.warning(
+    "You should **not** put sensitive data in the example, as this application "
+    "**is not secure**."
+)
+
+st.sidebar.header("About")
+st.sidebar.markdown(
+    "EDS-NLP is a contributive effort maintained by AP-HP's Data Science team. "
+    "Have a look at the "
+    "[documentation](https://aphp.github.io/edsnlp/) for "
+    "more information on the available components."
+)
+
+st.sidebar.header("Pipeline")
+st.sidebar.markdown(
+    "This example runs a simplistic pipeline detecting a few synonyms for "
+    "COVID-related entities.\n\n"
+    "You can add or remove pre-defined pipeline components, and see how "
+    "the pipeline reacts. You can also search for your own custom RegEx."
+)
+
+st.sidebar.header("Custom RegEx")
+st_custom_regex = st.sidebar.text_input(
+    "Regular Expression:",
+    r"asthmatique|difficult[ée]s?\srespiratoires?",
+)
+st.sidebar.markdown("The RegEx you defined above is detected under the `custom` label.")
+
+st.sidebar.subheader("Pipeline Components")
+st_pipes = {}
+
+st_pipes["cim10"] = st.sidebar.checkbox("CIM10 (loading can be slow)", value=False)
+st_drugs_container = st.sidebar.columns([1, 2])
+st_pipes["drugs"] = st_drugs_container[0].checkbox("Drugs", value=True)
+st_fuzzy_drugs = st_drugs_container[1].checkbox(
+    "Fuzzy drugs search", value=True, disabled=not st_pipes["drugs"]
+)
+for title, name in PIPES.items():
+    if name == "drugs" or name == "cim10":
+        continue
+    st_pipes[name] = st.sidebar.checkbox(title, value=True)
+st.sidebar.markdown(
+    "These are just a few of the components provided out-of-the-box by EDS-NLP. "
+    "See the [documentation](https://aphp.github.io/edsnlp/latest/pipes/) "
+    "for detail."
+)
+
+model_load_state = st.info("Loading model...")
+
+nlp, pipes, regex = load_model(
+    fuzzy_drugs=st_fuzzy_drugs,
+    custom_regex=st_custom_regex,
+    **st_pipes,
+)
+
+model_load_state.empty()
+
+st.header("Enter a text to analyse:")
+text = st.text_area(
+    "Modify the following text and see the pipeline react :",
+    DEFAULT_TEXT,
+    height=375,
+)
+
+doc = nlp(text)
+doc.ents = filter_spans(
+    (*doc.ents, *doc.spans.get("dates", []), *doc.spans.get("quantities", []))
+)
+
+st.header("Visualisation")
+
+st.markdown(
+    "The pipeline extracts simple entities using a dictionnary of RegEx (see the "
+    "[Export the pipeline section](#export-the-pipeline) for more information)."
+)
+
+category20 = [
+    "#1f77b4",
+    "#aec7e8",
+    "#ff7f0e",
+    "#ffbb78",
+    "#2ca02c",
+    "#98df8a",
+    "#d62728",
+    "#ff9896",
+    "#9467bd",
+    "#c5b0d5",
+    "#8c564b",
+    "#c49c94",
+    "#e377c2",
+    "#f7b6d2",
+    "#7f7f7f",
+    "#c7c7c7",
+    "#bcbd22",
+    "#dbdb8d",
+    "#17becf",
+    "#9edae5",
+]
+
+labels = [
+    "date",
+    "covid",
+    "drug",
+    "cim10",
+    "emergency_priority",
+    "sofa",
+    "charlson",
+    "size",
+    "weight",
+    "adicap",
+]
+
+colors = {label: cat for label, cat in zip(labels, category20)}
+colors["custom"] = "linear-gradient(90deg, #aa9cfc, #fc9ce7)"
+options = {
+    "colors": colors,
+}
+
+html = displacy.render(doc, style="ent", options=options)
+html = html.replace("line-height: 2.5;", "line-height: 2.25;")
+html = (
+    '<div style="padding: 10px; border: solid 2px; border-radius: 10px; '
+    f'border-color: #afc6e0;">{html}</div>'
+)
+st.write(html, unsafe_allow_html=True)
+
+data = []
+for ent in doc.ents:
+    d = dict(
+        start=ent.start_char,
+        end=ent.end_char,
+        text=ent.text,
+        label=ent.label_,
+        normalized_value=str(ent._.value or ""),
+        negation="YES" if ent._.negation else "NO",
+        family="YES" if ent._.family else "NO",
+        hypothesis="YES" if ent._.hypothesis else "NO",
+        reported_speech="YES" if ent._.reported_speech else "NO",
+    )
+
+    data.append(d)
+
+st.header("Entity qualification")
+
+
+def color_qualifiers(val: Any) -> str:
+    """
+    Add color to qualifiers.
+
+    Parameters
+    ----------
+    val : Any
+        DataFrame value
+
+    Returns
+    -------
+    str
+        style
+    """
+    if val == "NO":
+        return "color: #dc3545;"
+    elif val == "YES":
+        return "color: #198754;"
+    return ""
+
+
+if data:
+    df = pd.DataFrame.from_records(data)
+    df.normalized_value = df.normalized_value.replace({"None": ""})
+
+    df = df.style.applymap(color_qualifiers)
+
+    st.dataframe(df)
+
+else:
+    st.markdown("You pipeline did not match any entity...")
+
+pipes_text = ""
+
+if pipes:
+    pipes_text += "\n" + "\n".join(pipes) + "\n"
+if regex:
+    pipes_text += regex
+
+code = CODE.format(
+    pipes=pipes_text,
+    text=f'"""\n{text}\n"""',
+)
+
+st.header("Export the pipeline")
+st.markdown(
+    "The code below recreates the pipeline. Copy and paste it "
+    "in a Jupyter Notebook to interact with it."
+)
+with st.expander("Show the runnable code"):
+    st.markdown(f"```python\n{code}\n```\n\nThis code runs as is.")