NLP_CRT / Git / Diff of /generateTurkTasks.py

Models:
philipB/
NLP_CRT
Downloads: 1
Diff of /generateTurkTasks.py [000000] .. [8d2107]
Switch to side-by-side view

--- a
+++ b/generateTurkTasks.py
@@ -0,0 +1,44 @@
+import csv
+import itertools
+import re
+
+from loader import get_patient_by_EMPI
+from extract_data import get_ef_value_notes
+from shared_values import get_supplemental_list
+
+keywords = ['(?:ef|ejection fraction)\s*(?:of|is)?[:\s]*([0-9]*\.?[0-9]*)\s*%']
+allpatients = get_supplemental_list()
+for key, patients in itertools.groupby(enumerate(allpatients), lambda k: k[0]//20):
+    filename = "/home/ubuntu/www/turkTasks_" + str(key) + ".csv"
+    print "Working on: " + filename
+    rows = []
+    for (_, patient) in patients:
+        print patient
+        patient_data = get_patient_by_EMPI(patient)
+        efnotes = get_ef_value_notes(patient_data)
+        for (_, ef_value, note) in efnotes:
+            note_id = note.split('\n')[1].split('|')[3]
+
+            # change new line to html br
+            note = note.replace("\r\n", "<br>")
+
+            # bold found matches
+            for keyword in keywords:
+                pattern = re.compile(keyword)
+                matches = re.finditer(pattern, note)
+                offset = 0
+                for match in matches:
+                    start = match.start() + offset
+                    end = match.end() + offset
+                    replacement = ("<span class='highlight'>"
+                                   + note[start:end]
+                                   + "</span>")
+                    note = note[:start] + replacement + note[end:]
+                    offset += len(replacement) - (end - start)
+
+            rows.append((note, ef_value, patient, note_id))
+
+    with open(filename, 'wb') as csvfile:
+        csvwriter = csv.writer(csvfile)
+        csvwriter.writerow(['image1', 'guess', 'empi', 'note_id'])
+        csvwriter.writerows(rows)