|
a |
|
b/generateTurkTasks.py |
|
|
1 |
import csv |
|
|
2 |
import itertools |
|
|
3 |
import re |
|
|
4 |
|
|
|
5 |
from loader import get_patient_by_EMPI |
|
|
6 |
from extract_data import get_ef_value_notes |
|
|
7 |
from shared_values import get_supplemental_list |
|
|
8 |
|
|
|
9 |
keywords = ['(?:ef|ejection fraction)\s*(?:of|is)?[:\s]*([0-9]*\.?[0-9]*)\s*%'] |
|
|
10 |
allpatients = get_supplemental_list() |
|
|
11 |
for key, patients in itertools.groupby(enumerate(allpatients), lambda k: k[0]//20): |
|
|
12 |
filename = "/home/ubuntu/www/turkTasks_" + str(key) + ".csv" |
|
|
13 |
print "Working on: " + filename |
|
|
14 |
rows = [] |
|
|
15 |
for (_, patient) in patients: |
|
|
16 |
print patient |
|
|
17 |
patient_data = get_patient_by_EMPI(patient) |
|
|
18 |
efnotes = get_ef_value_notes(patient_data) |
|
|
19 |
for (_, ef_value, note) in efnotes: |
|
|
20 |
note_id = note.split('\n')[1].split('|')[3] |
|
|
21 |
|
|
|
22 |
# change new line to html br |
|
|
23 |
note = note.replace("\r\n", "<br>") |
|
|
24 |
|
|
|
25 |
# bold found matches |
|
|
26 |
for keyword in keywords: |
|
|
27 |
pattern = re.compile(keyword) |
|
|
28 |
matches = re.finditer(pattern, note) |
|
|
29 |
offset = 0 |
|
|
30 |
for match in matches: |
|
|
31 |
start = match.start() + offset |
|
|
32 |
end = match.end() + offset |
|
|
33 |
replacement = ("<span class='highlight'>" |
|
|
34 |
+ note[start:end] |
|
|
35 |
+ "</span>") |
|
|
36 |
note = note[:start] + replacement + note[end:] |
|
|
37 |
offset += len(replacement) - (end - start) |
|
|
38 |
|
|
|
39 |
rows.append((note, ef_value, patient, note_id)) |
|
|
40 |
|
|
|
41 |
with open(filename, 'wb') as csvfile: |
|
|
42 |
csvwriter = csv.writer(csvfile) |
|
|
43 |
csvwriter.writerow(['image1', 'guess', 'empi', 'note_id']) |
|
|
44 |
csvwriter.writerows(rows) |