|
a |
|
b/sandbox.py |
|
|
1 |
from loader import * |
|
|
2 |
from language_processing import * |
|
|
3 |
from extract_data import * |
|
|
4 |
from build_graphs import * |
|
|
5 |
|
|
|
6 |
while raw_input("Do you want to perform a search? (y/n) ").lower() == 'y': |
|
|
7 |
keywords = [] |
|
|
8 |
keyword = "a" |
|
|
9 |
while len(keyword) > 0: |
|
|
10 |
keyword = raw_input("Enter search: ") |
|
|
11 |
by_patient = [] |
|
|
12 |
for i in range(50): |
|
|
13 |
data = get_data([i])[0] |
|
|
14 |
by_patient += [get_doc_keywords(data, keywords, None, True)] |
|
|
15 |
print by_patient |
|
|
16 |
|
|
|
17 |
|
|
|
18 |
while raw_input("Do you want to generate plots? (y/n) ").lower() == 'y': |
|
|
19 |
lower_range = int(raw_input("Lower bound on range (0-912): ")) |
|
|
20 |
upper_range = int(raw_input("Upper bound on range (1-913): ")) |
|
|
21 |
plot_num_docs(range(lower_range, upper_range)) |
|
|
22 |
|
|
|
23 |
if raw_input("Do you want to test the document date fields? (y/n) ").lower() == 'y': |
|
|
24 |
doc_dates = dict() |
|
|
25 |
for i in range(50): |
|
|
26 |
data = get_data([i])[0] |
|
|
27 |
for doc_type in data: |
|
|
28 |
if data[doc_type] != None: |
|
|
29 |
doc = data[doc_type] |
|
|
30 |
if type(doc) != type(list()): |
|
|
31 |
doc = [doc] |
|
|
32 |
if len(doc) > 0: |
|
|
33 |
doc = doc[0] |
|
|
34 |
for label in doc: |
|
|
35 |
if "date" in label.lower(): |
|
|
36 |
if doc_type in doc_dates: |
|
|
37 |
doc_dates[doc_type].add(label) |
|
|
38 |
else: |
|
|
39 |
doc_dates[doc_type] = set([label]) |
|
|
40 |
print doc_dates |
|
|
41 |
|
|
|
42 |
if raw_input("Do you want to test operation date extractor? (y/n) ").lower() == 'y': |
|
|
43 |
#Test the procedure date extractor |
|
|
44 |
for i in range(1000): |
|
|
45 |
data = get_data([i])[0] |
|
|
46 |
print get_operation_date(data) |
|
|
47 |
|
|
|
48 |
print "===================================================" |
|
|
49 |
|
|
|
50 |
#Test the file extractor |
|
|
51 |
if raw_input("Do you want to test parse_note_header? (y/n) ").lower() == 'y': |
|
|
52 |
for i in range(50): |
|
|
53 |
data = get_data([i])[0] |
|
|
54 |
clean_data(data) |
|
|
55 |
for tag in data: |
|
|
56 |
data_tag = data[tag] |
|
|
57 |
for doc in data_tag: |
|
|
58 |
parsed = parse_note_header(doc, tag) |
|
|
59 |
if len(parsed) > 0: |
|
|
60 |
print tag, parsed |
|
|
61 |
|
|
|
62 |
#Explore a patient's data |
|
|
63 |
again = True |
|
|
64 |
maxkey = 900 |
|
|
65 |
while again: |
|
|
66 |
num = -1 |
|
|
67 |
while not unicode(num).isnumeric() or int(num) < 0 or int(num) >= maxkey: |
|
|
68 |
num = raw_input("Select a patient number from 0 - " + str(maxkey) + ": ") |
|
|
69 |
num = int(num) |
|
|
70 |
patient = dict(get_data([num])[0]) |
|
|
71 |
clean_data(patient) |
|
|
72 |
|
|
|
73 |
explore(patient) |
|
|
74 |
inp = raw_input("Look at another patient? (y/n): ") |
|
|
75 |
again = inp.lower() == "y" or (unicode(inp).isnumeric()) |
|
|
76 |
|
|
|
77 |
|
|
|
78 |
s = "Hello person. I weigh 15.6kg. How about you?I think you are 20.Correct?" |
|
|
79 |
print split_sentences(s) |
|
|
80 |
|
|
|
81 |
#See the entire bag of words for the 'Lno' field across all patients |
|
|
82 |
''' |
|
|
83 |
if raw_input("Enter 'y' to see bag of words test:") == "y": |
|
|
84 |
print |
|
|
85 |
x = raw_input("This is about to output the entire bag of words for all patients\nin their Lno file. Press enter to continue") |
|
|
86 |
print |
|
|
87 |
tag = u'Lno' |
|
|
88 |
bag = bag_of_words(data[data.keys()[0]][tag]) |
|
|
89 |
for i in range(1, len(data.keys())): |
|
|
90 |
person = data[data.keys()[i]] |
|
|
91 |
if tag in person: |
|
|
92 |
bag = bag_of_words(person[tag], bag) |
|
|
93 |
|
|
|
94 |
tuple_bag = [(str(k), bag[k]) for k in bag] |
|
|
95 |
print sorted(tuple_bag, key = lambda k: k[1], reverse = True) |
|
|
96 |
|
|
|
97 |
''' |