[2d4573]: / wrapper_functions / utils.py

Download this file

28 lines (24 with data), 857 Bytes

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from PyRuSH import RuSH
import stanza
import scispacy
import spacy
def get_sents_pyrush(text):
print("Segment into sentences using PyRuSH")
rush = RuSH('conf/rush_rules.tsv')
sentences = rush.segToSentenceSpans(text)
return sentences
def get_sents_stanza(text):
stanza.download('en')
nlp = stanza.Pipeline(lang='en', processors='tokenize')
sentences = [sentence.text for sentence in nlp(text).sentences]
return sentences
def get_multiple_sents_stanza(texts):
stanza.download('en')
nlp = stanza.Pipeline(lang='en', processors='tokenize')
sentences = [[sentence.text for sentence in nlp(text).sentences] for text in texts]
return sentences
def get_sents_scispacy(text):
nlp = spacy.load("en_core_sci_sm")
doc = nlp(text)
sentences = [sentence.text for sentence in doc.sents]
return sentences