|
a |
|
b/util/get_sentiment_labels.py |
|
|
1 |
import os |
|
|
2 |
from glob import glob |
|
|
3 |
import json |
|
|
4 |
|
|
|
5 |
print('##############################') |
|
|
6 |
print('start generating ZuCo task1-SR sentiment labels...') |
|
|
7 |
|
|
|
8 |
|
|
|
9 |
sentiment_labels_task1_csv_path = '~/datasets/ZuCo/task_materials/sentiment_labels_task1.csv' |
|
|
10 |
|
|
|
11 |
sentiment_labels = {} |
|
|
12 |
with open(sentiment_labels_task1_csv_path, 'r') as f: |
|
|
13 |
for line in f: |
|
|
14 |
if line.startswith('sentence_id') or line.startswith('#'): |
|
|
15 |
continue |
|
|
16 |
else: |
|
|
17 |
parsed_line = line.split(';') |
|
|
18 |
# handle edge case: |
|
|
19 |
if '\";' in line: |
|
|
20 |
sent_text = line.split('\";')[0].split('\"')[1] |
|
|
21 |
else: |
|
|
22 |
sent_text = parsed_line[1] |
|
|
23 |
label = int(parsed_line[-1].strip()) |
|
|
24 |
sentiment_labels[sent_text] = label |
|
|
25 |
|
|
|
26 |
output_dir = f'~/datasets/ZuCo/task1-SR/sentiment_labels' |
|
|
27 |
if not os.path.exists(output_dir): |
|
|
28 |
os.makedirs(output_dir) |
|
|
29 |
|
|
|
30 |
with open(os.path.join(output_dir, 'sentiment_labels.json'), 'w') as out: |
|
|
31 |
json.dump(sentiment_labels,out,indent = 4) |
|
|
32 |
print('write to ~/datasets/ZuCo/task1-SR/sentiment_labels/sentiment_labels.json') |
|
|
33 |
|