--- a +++ b/demo/app.py @@ -0,0 +1,352 @@ +from typing import Any + +import pandas as pd +import streamlit as st +from spacy import displacy + +import edsnlp +import edsnlp.pipes as eds +from edsnlp.utils.filter import filter_spans + +DEFAULT_TEXT = """\ +Motif : +Le patient est admis le 29 août pour des difficultés respiratoires. + +Antécédents familiaux : +Le père du patient n'est pas asthmatique. + +HISTOIRE DE LA MALADIE +Le patient dit avoir de la toux depuis trois jours. \ +Elle a empiré jusqu'à nécessiter un passage aux urgences. +A noter deux petits kystes bénins de 1 et 2cm biopsiés en 2005. + +Priorité: 2 (établie par l'IAO à l'entrée) + +adicaps ABCD0A12 et ABCD0A13 + +Conclusion +Possible infection au coronavirus. Prescription de paracétomol pour la fièvre.\ +""" + +REGEX = """ +# RegEx and terms matcher +nlp.add_pipe( + eds.matcher( + regex=dict(custom=r"{custom_regex}"), + attr="NORM", + ), +) +""" + +CODE = """ +import edsnlp, edsnlp.pipes as eds + +# Declare the pipeline +nlp = edsnlp.blank("eds") + +# General-purpose components +nlp.add_pipe(eds.normalizer()) +nlp.add_pipe(eds.sentences()) +{pipes} +# Qualifier pipes +nlp.add_pipe(eds.negation()) +nlp.add_pipe(eds.family()) +nlp.add_pipe(eds.hypothesis()) +nlp.add_pipe(eds.rspeech()) + +# Define the note text +text = {text} + +# Apply the pipeline +doc = nlp(text) + +# Explore matched elements +doc.ents +""" + +PIPES = { + "Drugs": "drugs", + "CIM10": "cim10", + "Dates": "dates", + "Quantities": "quantities", + "Charlson": "charlson", + "SOFA": "sofa", + "Elston & Ellis": "elston_ellis", + "TNM": "tnm", + "Priority": "emergency_priority", + "CCMU": "emergency_ccmu", + "GEMSA": "emergency_gemsa", + "Covid": "covid", + "Adicap": "adicap", + "Diabetes": "diabetes", + "Tobacco": "tobacco", + "AIDS": "aids", + "Lymphoma": "lymphoma", + "Leukemia": "leukemia", + "Solid Tumor": "solid_tumor", + "CKD": "ckd", + "Hemiplegia": "hemiplegia", + "Liver Disease": "liver_disease", + "Peptic Ulcer Disease": "peptic_ulcer_disease", + "Connective Tissue Disease": "connective_tissue_disease", + "COPD": "copd", + "Dementia": "dementia", + "Cerebrovascular Accident": "cerebrovascular_accident", + "Peripheral Vascular Disease": "peripheral_vascular_disease", + "Congestive Heart Failure": "congestive_heart_failure", + "Myocardial Infarction": "myocardial_infarction", + "Alcohol": "alcohol", +} + + +@st.cache_resource() +def load_model(custom_regex: str, **enabled): + pipes = [] + + # Declare the pipeline + nlp = edsnlp.blank("eds") + nlp.add_pipe(eds.normalizer()) + nlp.add_pipe(eds.sentences()) + + for title, name in PIPES.items(): + if name == "drugs": + if enabled["drugs"]: + if enabled["fuzzy_drugs"]: + nlp.add_pipe(eds.drugs(term_matcher="simstring")) + pipes.append('nlp.add_pipe(eds.drugs(term_matcher="simstring"))') + else: + nlp.add_pipe(eds.drugs()) + pipes.append("nlp.add_pipe(eds.drugs())") + + else: + if enabled[name]: + nlp.add_pipe(f"eds.{name}") + pipes.append(f"nlp.add_pipe(eds.{name}())") + + if pipes: + pipes.insert(0, "# Entity extraction pipes") + + if custom_regex: + nlp.add_pipe( + eds.matcher( + regex=dict(custom=custom_regex), + attr="NORM", + ), + ) + + regex = REGEX.format(custom_regex=custom_regex) + + else: + regex = "" + + nlp.add_pipe(eds.negation()) + nlp.add_pipe(eds.family()) + nlp.add_pipe(eds.hypothesis()) + nlp.add_pipe(eds.rspeech()) + + return nlp, pipes, regex + + +st.set_page_config( + page_title="EDS-NLP Demo", + page_icon="📄", +) + +st.title("EDS-NLP") + +st.warning( + "You should **not** put sensitive data in the example, as this application " + "**is not secure**." +) + +st.sidebar.header("About") +st.sidebar.markdown( + "EDS-NLP is a contributive effort maintained by AP-HP's Data Science team. " + "Have a look at the " + "[documentation](https://aphp.github.io/edsnlp/) for " + "more information on the available components." +) + +st.sidebar.header("Pipeline") +st.sidebar.markdown( + "This example runs a simplistic pipeline detecting a few synonyms for " + "COVID-related entities.\n\n" + "You can add or remove pre-defined pipeline components, and see how " + "the pipeline reacts. You can also search for your own custom RegEx." +) + +st.sidebar.header("Custom RegEx") +st_custom_regex = st.sidebar.text_input( + "Regular Expression:", + r"asthmatique|difficult[ée]s?\srespiratoires?", +) +st.sidebar.markdown("The RegEx you defined above is detected under the `custom` label.") + +st.sidebar.subheader("Pipeline Components") +st_pipes = {} + +st_pipes["cim10"] = st.sidebar.checkbox("CIM10 (loading can be slow)", value=False) +st_drugs_container = st.sidebar.columns([1, 2]) +st_pipes["drugs"] = st_drugs_container[0].checkbox("Drugs", value=True) +st_fuzzy_drugs = st_drugs_container[1].checkbox( + "Fuzzy drugs search", value=True, disabled=not st_pipes["drugs"] +) +for title, name in PIPES.items(): + if name == "drugs" or name == "cim10": + continue + st_pipes[name] = st.sidebar.checkbox(title, value=True) +st.sidebar.markdown( + "These are just a few of the components provided out-of-the-box by EDS-NLP. " + "See the [documentation](https://aphp.github.io/edsnlp/latest/pipes/) " + "for detail." +) + +model_load_state = st.info("Loading model...") + +nlp, pipes, regex = load_model( + fuzzy_drugs=st_fuzzy_drugs, + custom_regex=st_custom_regex, + **st_pipes, +) + +model_load_state.empty() + +st.header("Enter a text to analyse:") +text = st.text_area( + "Modify the following text and see the pipeline react :", + DEFAULT_TEXT, + height=375, +) + +doc = nlp(text) +doc.ents = filter_spans( + (*doc.ents, *doc.spans.get("dates", []), *doc.spans.get("quantities", [])) +) + +st.header("Visualisation") + +st.markdown( + "The pipeline extracts simple entities using a dictionnary of RegEx (see the " + "[Export the pipeline section](#export-the-pipeline) for more information)." +) + +category20 = [ + "#1f77b4", + "#aec7e8", + "#ff7f0e", + "#ffbb78", + "#2ca02c", + "#98df8a", + "#d62728", + "#ff9896", + "#9467bd", + "#c5b0d5", + "#8c564b", + "#c49c94", + "#e377c2", + "#f7b6d2", + "#7f7f7f", + "#c7c7c7", + "#bcbd22", + "#dbdb8d", + "#17becf", + "#9edae5", +] + +labels = [ + "date", + "covid", + "drug", + "cim10", + "emergency_priority", + "sofa", + "charlson", + "size", + "weight", + "adicap", +] + +colors = {label: cat for label, cat in zip(labels, category20)} +colors["custom"] = "linear-gradient(90deg, #aa9cfc, #fc9ce7)" +options = { + "colors": colors, +} + +html = displacy.render(doc, style="ent", options=options) +html = html.replace("line-height: 2.5;", "line-height: 2.25;") +html = ( + '<div style="padding: 10px; border: solid 2px; border-radius: 10px; ' + f'border-color: #afc6e0;">{html}</div>' +) +st.write(html, unsafe_allow_html=True) + +data = [] +for ent in doc.ents: + d = dict( + start=ent.start_char, + end=ent.end_char, + text=ent.text, + label=ent.label_, + normalized_value=str(ent._.value or ""), + negation="YES" if ent._.negation else "NO", + family="YES" if ent._.family else "NO", + hypothesis="YES" if ent._.hypothesis else "NO", + reported_speech="YES" if ent._.reported_speech else "NO", + ) + + data.append(d) + +st.header("Entity qualification") + + +def color_qualifiers(val: Any) -> str: + """ + Add color to qualifiers. + + Parameters + ---------- + val : Any + DataFrame value + + Returns + ------- + str + style + """ + if val == "NO": + return "color: #dc3545;" + elif val == "YES": + return "color: #198754;" + return "" + + +if data: + df = pd.DataFrame.from_records(data) + df.normalized_value = df.normalized_value.replace({"None": ""}) + + df = df.style.applymap(color_qualifiers) + + st.dataframe(df) + +else: + st.markdown("You pipeline did not match any entity...") + +pipes_text = "" + +if pipes: + pipes_text += "\n" + "\n".join(pipes) + "\n" +if regex: + pipes_text += regex + +code = CODE.format( + pipes=pipes_text, + text=f'"""\n{text}\n"""', +) + +st.header("Export the pipeline") +st.markdown( + "The code below recreates the pipeline. Copy and paste it " + "in a Jupyter Notebook to interact with it." +) +with st.expander("Show the runnable code"): + st.markdown(f"```python\n{code}\n```\n\nThis code runs as is.")