Switch to unified view

a b/QueryExtraction/spacy_test_query.py
1
'''
2
test
3
'''
4
5
import json
6
import spacy
7
import pytextrank
8
from collections import defaultdict
9
10
11
12
13
nlp = spacy.load('en_core_web_sm')
14
15
# load
16
nlp = spacy.load("en_core_web_sm")
17
# add PyTextRank to the spaCy pipeline
18
tr = pytextrank.TextRank()
19
nlp.add_pipe(tr.PipelineComponent, name='textrank', last=True)
20
21
22
# method
23
def pytextrank_extract(free_text,topk=30):
24
    query_set = defaultdict(float)
25
26
    'textrank extraction'
27
    doc = nlp(free_text)
28
29
    for p in doc._.phrases:
30
31
        if len(p.text) > 5:
32
            query_set[p.text] = query_set[p.text] + p.rank
33
34
    ordered_query_set = [(k,v) for k, v in sorted(query_set.items(), key=lambda item: item[1],reverse=True)][:topk]
35
36
    result_list = []
37
    for query, score in ordered_query_set:
38
        # print(query,score)
39
        result_list.append(query)
40
    return result_list
41
42
43
# ordered_query_set = extract(test_free_text)
44
45
46
47
#
48
# # print out