Switch to unified view

a b/wrapper_functions/utils.py
1
from PyRuSH import RuSH
2
import stanza
3
import scispacy
4
import spacy
5
6
def get_sents_pyrush(text):
7
    print("Segment into sentences using PyRuSH")
8
    rush = RuSH('conf/rush_rules.tsv')
9
    sentences = rush.segToSentenceSpans(text)
10
    return sentences
11
12
def get_sents_stanza(text):
13
    stanza.download('en')
14
    nlp = stanza.Pipeline(lang='en', processors='tokenize')
15
    sentences = [sentence.text for sentence in nlp(text).sentences]
16
    return sentences
17
18
def get_multiple_sents_stanza(texts):
19
    stanza.download('en')
20
    nlp = stanza.Pipeline(lang='en', processors='tokenize')
21
    sentences = [[sentence.text for sentence in nlp(text).sentences] for text in texts]
22
    return sentences
23
24
def get_sents_scispacy(text):
25
    nlp = spacy.load("en_core_sci_sm")
26
    doc = nlp(text)
27
    sentences = [sentence.text for sentence in doc.sents]
28
    return sentences