Switch to unified view

a b/QueryExtraction/yake_test_query.py
1
'''
2
YAKE: https://github.com/LIAAD/yake
3
4
YAKE! is a light-weight unsupervised automatic keyword extraction method which rests on text statistical features extracted from single documents to select the most important keywords of a text.
5
We compare it against ten state-of-the-art unsupervised approaches (TF.IDF, KP-Miner, RAKE, TextRank, SingleRank, ExpandRank, TopicRank, TopicalPageRank, PositionRank and MultipartiteRank), and one supervised method (KEA).
6
'''
7
8
9
import yake
10
11
12
language = "en"
13
max_ngram_size = 3
14
deduplication_thresold = 0.9
15
deduplication_algo = 'seqm'
16
windowSize = 1
17
18
19
def yake_extract(text,topk=30):
20
21
    custom_kw_extractor = yake.KeywordExtractor(lan=language, n=max_ngram_size, dedupLim=deduplication_thresold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=topk, features=None)
22
    keywords = custom_kw_extractor.extract_keywords(text)
23
24
    results = []
25
    for kw in keywords:
26
        results.append(kw[0])
27
28
    return results
29