EHRKit-2022 / Git / Diff of /wrapper

Downloads: 1

Diff of /wrapper_functions/utils.py [000000] .. [2d4573]

 b/wrapper_functions/utils.py
+from PyRuSH import RuSH
+import stanza
+import scispacy
+import spacy
+def get_sents_pyrush(text):
+    print("Segment into sentences using PyRuSH")
+    rush = RuSH('conf/rush_rules.tsv')
+    sentences = rush.segToSentenceSpans(text)
+    return sentences
+def get_sents_stanza(text):
+    stanza.download('en')
+    nlp = stanza.Pipeline(lang='en', processors='tokenize')
+    sentences = [sentence.text for sentence in nlp(text).sentences]
+    return sentences
+def get_multiple_sents_stanza(texts):
+    stanza.download('en')
+    nlp = stanza.Pipeline(lang='en', processors='tokenize')
+    sentences = [[sentence.text for sentence in nlp(text).sentences] for text in texts]
+    return sentences
+def get_sents_scispacy(text):
+    nlp = spacy.load("en_core_sci_sm")
+    doc = nlp(text)
+    sentences = [sentence.text for sentence in doc.sents]
+    return sentences