a b/QueryExtraction/rake_test_query.py
1
'''
2
3
https://github.com/csurfer/rake-nltk
4
https://towardsdatascience.com/extracting-keyphrases-from-text-rake-and-gensim-in-python-eefd0fad582f
5
6
Paper: Automatic Keyword Extraction from Individual Documents
7
easily applied to new domains, and operates well on multiple types of documents. And efficiency.
8
Method is based on frequency.
9
10
'''
11
from rake_nltk import Rake
12
13
# Uses stopwords for english from NLTK, and all puntuation characters by
14
# default
15
r = Rake()
16
17
18
19
# Extraction given the text.
20
# r.extract_keywords_from_text(test_free_text)
21
22
# Extraction given the list of strings where each string is a sentence.
23
# r.extract_keywords_from_sentences(<list of sentences>)
24
25
# To get keyword phrases ranked highest to lowest.
26
# r.get_ranked_phrases()
27
28
# To get keyword phrases ranked highest to lowest with scores.
29
# print (r.get_ranked_phrases_with_scores())
30
31
32
def rake_extract(test_free_text,topk=30):
33
    r.extract_keywords_from_text(test_free_text)
34
    results = r.get_ranked_phrases()[:topk]
35
36
    return results
37
38