Diff of /generateTurkTasks.py [000000] .. [8d2107]

Switch to unified view

a b/generateTurkTasks.py
1
import csv
2
import itertools
3
import re
4
5
from loader import get_patient_by_EMPI
6
from extract_data import get_ef_value_notes
7
from shared_values import get_supplemental_list
8
9
keywords = ['(?:ef|ejection fraction)\s*(?:of|is)?[:\s]*([0-9]*\.?[0-9]*)\s*%']
10
allpatients = get_supplemental_list()
11
for key, patients in itertools.groupby(enumerate(allpatients), lambda k: k[0]//20):
12
    filename = "/home/ubuntu/www/turkTasks_" + str(key) + ".csv"
13
    print "Working on: " + filename
14
    rows = []
15
    for (_, patient) in patients:
16
        print patient
17
        patient_data = get_patient_by_EMPI(patient)
18
        efnotes = get_ef_value_notes(patient_data)
19
        for (_, ef_value, note) in efnotes:
20
            note_id = note.split('\n')[1].split('|')[3]
21
22
            # change new line to html br
23
            note = note.replace("\r\n", "<br>")
24
25
            # bold found matches
26
            for keyword in keywords:
27
                pattern = re.compile(keyword)
28
                matches = re.finditer(pattern, note)
29
                offset = 0
30
                for match in matches:
31
                    start = match.start() + offset
32
                    end = match.end() + offset
33
                    replacement = ("<span class='highlight'>"
34
                                   + note[start:end]
35
                                   + "</span>")
36
                    note = note[:start] + replacement + note[end:]
37
                    offset += len(replacement) - (end - start)
38
39
            rows.append((note, ef_value, patient, note_id))
40
41
    with open(filename, 'wb') as csvfile:
42
        csvwriter = csv.writer(csvfile)
43
        csvwriter.writerow(['image1', 'guess', 'empi', 'note_id'])
44
        csvwriter.writerows(rows)