[2d4573]: / extractiveSummarization / scripts / bert_summaries.py

Download this file

57 lines (45 with data), 1.9 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import sys, os
import glob
import shutil
from pathlib import Path
import time
import argparse
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# print(sys.path)
from summarizer import Summarizer
parser = argparse.ArgumentParser()
parser.add_argument('--source', action='store', metavar='dir_path', type=str, help='directory path containing source', required=True)
parser.add_argument('--saveto', action='store', metavar='dir_path', type=str, help='directory path to store produced summaries', required=True)
parser.add_argument('--threshold', action='store', type=float, help="default 0.03")
parser.add_argument('--ratio', action='store', type=int, help='ratio of summary size to source size (based on number of sentences). default 0.05')
parser.add_argument('--n', action='store', metavar='ndocs', type=int, help='number of documents to produce summaries of. default 2.')
args = parser.parse_args()
source_dir = args.source
saveto_dir = args.saveto
ratio = args.ratio or 0.05
ndocs = args.n or 2
if not os.path.isdir(source_dir):
print('The source directory specified does not exist')
sys.exit()
if not os.path.isdir(saveto_dir):
print('The saveto directory specified does not exist')
sys.exit()
start=time.time()
documents = {}
model = Summarizer()
for i, filepath in enumerate(glob.glob(os.path.join(source_dir, '*.src'))):
with open(filepath) as fp:
fname = Path(filepath).stem
documents[fname] = fp.read()
summary = model(documents[fname], ratio=ratio)
joined_summary = ''.join(summary).replace('\n', '')
summary_path = os.path.join(saveto_dir, fname + ".sum")
with open(summary_path, 'w') as sum:
sum.write(joined_summary)
if i == ndocs - 1:
break
end = time.time()
print("----Bert summary----")
# print(saveto_dir)
print("Num docs: " + str(ndocs) + " sentence ratio: " + str(ratio))
print("Runtime " + str(end - start))