--- a +++ b/wrapper_functions/utils.py @@ -0,0 +1,28 @@ +from PyRuSH import RuSH +import stanza +import scispacy +import spacy + +def get_sents_pyrush(text): + print("Segment into sentences using PyRuSH") + rush = RuSH('conf/rush_rules.tsv') + sentences = rush.segToSentenceSpans(text) + return sentences + +def get_sents_stanza(text): + stanza.download('en') + nlp = stanza.Pipeline(lang='en', processors='tokenize') + sentences = [sentence.text for sentence in nlp(text).sentences] + return sentences + +def get_multiple_sents_stanza(texts): + stanza.download('en') + nlp = stanza.Pipeline(lang='en', processors='tokenize') + sentences = [[sentence.text for sentence in nlp(text).sentences] for text in texts] + return sentences + +def get_sents_scispacy(text): + nlp = spacy.load("en_core_sci_sm") + doc = nlp(text) + sentences = [sentence.text for sentence in doc.sents] + return sentences \ No newline at end of file