EHRKit-2022 / Git / Diff of /wrapper_functions/medspacy

Models:
philipB/
EHRKit-2022
Downloads: 1
Diff of /wrapper_functions/medspacy_functions.py [000000] .. [2d4573]
Switch to side-by-side view

--- a
+++ b/wrapper_functions/medspacy_functions.py
@@ -0,0 +1,192 @@
+# pip install medspacy==0.2.0.0
+
+
+import spacy
+import medspacy
+from medspacy.util import DEFAULT_PIPENAMES
+from medspacy.custom_tokenizer import create_medspacy_tokenizer
+import medspacy
+from medspacy.section_detection import Sectionizer
+from medspacy.section_detection import SectionRule
+
+import warnings
+warnings.filterwarnings("ignore")
+
+
+
+def get_word_tokenization(text):
+    """
+    returns a list of strings of tokenized word from a input string
+
+    example:
+    >> get_word_tokenization("'''Admission Date:  [**2573-5-30**] ")
+    >> ['Admission', 'Date', ':', '[', '*', '*', '2573', '-', '5',...]
+    """
+
+    # logging
+    print(f"Getting sentence tokenizer from medspaCy\n")
+
+    nlp = spacy.blank("en")
+
+    medspacy_tokenizer = create_medspacy_tokenizer(nlp)
+
+    print(f"Input text (truncated): {text}\n...")
+    results = list(medspacy_tokenizer(text))
+
+    'filter out empty elements'
+    tokenized = [element.text.strip() for element in results if len(element.text.strip()) > 0]
+
+    return tokenized
+
+
+def get_section_detection(text,rules=None):
+    '''
+    given a string as the input, extract sections, consisting of medical history, allergies, comments and so on
+    :param text: a string
+    :rules: the personalized rules, a dictionary of string, i.e., {"category": "allergies"}
+    :return: a list of spacy Section object.
+
+
+    Example usage:
+    >> text1 = 'Past Medical History:
+    pt has history of medical events
+    Comments: some comment here
+
+    Allergies: apple, seafood
+    peanuts'
+
+    >> get_section_detection(text1)
+    >>
+    CATEGORY.............. past_medical_history
+    TITLE................. Past Medical History:
+    PARENT................ None
+    SECTION TEXT..........
+
+    pt has history of medical events
+
+    ----------------------
+    CATEGORY.............. comments
+    TITLE................. Comments:
+    PARENT................ None
+    SECTION TEXT..........
+    some comment here
+
+
+    ----------------------
+    CATEGORY.............. allergies
+    TITLE................. Allergies:
+    PARENT................ None
+    SECTION TEXT..........
+    apple, seafood
+    peanuts
+    '''
+    nlp = medspacy.load()
+    sectionizer = Sectionizer(nlp, rules=None)
+    pattern_dicts = [{"category": "past_medical_history", "literal": "Past Medical History:"},
+                     {"category": "allergies", "literal": "Allergies:"},
+                     {"category": "medical_assessment", "literal": "Medical Assessment:"},
+                     {"category": "comment", "literal": "Comments:", "parents": ["past_medical_history", "allergies"]}]
+    if rules is not None:
+        # combine with personalized rule
+        pattern_dicts.append(rules)
+    patterns = [SectionRule.from_dict(pattern) for pattern in pattern_dicts]
+    sectionizer.add(patterns)
+    nlp.add_pipe("medspacy_sectionizer")
+    doc = nlp(text)
+    sections = []
+
+    print(f"Getting section detection function from medspaCy\n")
+    for section in doc._.sections:
+        print("CATEGORY.............. {0}".format(section.category))
+        print("TITLE................. {0}".format(section.title_span))
+        if section.parent:
+            print("PARENT................ {0}".format(section.parent.category))
+        else:
+            print("PARENT................ {0}".format(section.parent))
+        print("SECTION TEXT..........\n{0}".format(section.body_span))
+        print("----------------------")
+        sections.append(section)
+
+    return sections
+
+def get_UMLS_match(text):
+
+    '''
+    Match the UMLS concept for the input text.
+    :param text: a string
+    :return: a list of tuples, (entity_text, label, similarity, semtypes)
+
+    Example:
+    >> concept_text = 'Decreased dipalmitoyllecithin content found in lung specimens'
+    >> get_UMLS_match(concept_type)
+    >>
+    Entity text : dipalmitoyllecithin
+    Label (UMLS CUI) : C0000039
+    Similarity : 0.8888888888888888
+    Semtypes : {'T119', 'T121'}
+    '''
+
+    medspacy_pipes = DEFAULT_PIPENAMES.copy()
+
+    if 'medspacy_quickumls' not in medspacy_pipes:
+        medspacy_pipes.add('medspacy_quickumls')
+
+
+    nlp = medspacy.load(enable=medspacy_pipes)
+
+
+    doc = nlp(text)
+
+    umls = []
+
+    print(f"Getting UMLS matching from medspaCy\n")
+    for ent in doc.ents:
+        print('Entity text : {}'.format(ent.text))
+        print('Label (UMLS CUI) : {}'.format(ent.label_))
+        print('Similarity : {}'.format(ent._.similarity))
+        print('Semtypes : {}'.format(ent._.semtypes))
+        umls.append((ent.text,ent.label_,ent._.similarity, ent._.semtypes))
+
+    return umls
+
+
+
+# example code
+demo_text1 = '''Admission Date:  [**2573-5-30**]              Discharge Date:   [**2573-7-1**]
+
+Date of Birth:  [**2498-8-19**]             Sex:   F
+
+Service: SURGERY
+
+Allergies:
+Hydrochlorothiazide
+
+Attending:[**First Name3 (LF) 1893**]
+Chief Complaint:
+Abdominal pain
+
+Major Surgical or Invasive Procedure:
+PICC line [**6-25**]
+ERCP w/ sphincterotomy [**5-31**]
+
+
+History of Present Illness:
+74y female with type 2 dm and a recent stroke affecting her
+speech, who presents with 2 days of abdominal pain. Imaging sh'''
+get_word_tokenization(demo_text1)
+
+demo_text2 = '''Past Medical History: 
+pt has history of medical events
+Comments: some comment here
+
+Allergies: apple, seafood
+peanuts
+'''
+get_section_detection(demo_text2)
+
+
+
+demo_text3 = 'Decreased dipalmitoyllecithin content found in lung specimens'
+get_UMLS_match(demo_text3)
+
+