|
a |
|
b/loader.py |
|
|
1 |
import json |
|
|
2 |
import os |
|
|
3 |
|
|
|
4 |
patients_path = '/PHShome/ju601/crt/data/json/' |
|
|
5 |
#patients_path = '/PHShome/ju601/crt/data/MGHpallcare/json/' |
|
|
6 |
|
|
|
7 |
# Deprecated |
|
|
8 |
#loads old annonomyzed data from JSON file into nested dictionary |
|
|
9 |
def get_old_data(): |
|
|
10 |
with open('data/anon_data_OLD.json', 'r') as f: |
|
|
11 |
data = json.load(f) |
|
|
12 |
return data |
|
|
13 |
|
|
|
14 |
# Gets a specific patient by their EMPI |
|
|
15 |
def get_patient_by_EMPI(empi): |
|
|
16 |
file_path = patients_path + empi + '.json' |
|
|
17 |
if os.path.isfile(file_path): |
|
|
18 |
with open(file_path, 'r') as f: |
|
|
19 |
patient = json.load(f) |
|
|
20 |
return patient |
|
|
21 |
else: |
|
|
22 |
raise ValueError("Patient with EMPI: " + empi + " does not exist") |
|
|
23 |
|
|
|
24 |
def save(patient): |
|
|
25 |
file_path = patients_path + patient['EMPI'] + '.json' |
|
|
26 |
if os.path.isfile(file_path): |
|
|
27 |
with open(file_path, 'w') as f: |
|
|
28 |
json.dump(patient, f) |
|
|
29 |
return patient |
|
|
30 |
else: |
|
|
31 |
raise ValueError("Patient with EMPI: " + empi + " does not exist") |
|
|
32 |
|
|
|
33 |
def get_data(patient_range=range(0,10)): |
|
|
34 |
if len(patient_range) > 50: |
|
|
35 |
raise ValueError("You've attempted to load too many patients into memory at once.") |
|
|
36 |
|
|
|
37 |
files = os.listdir(patients_path) |
|
|
38 |
# Sort by number order |
|
|
39 |
# Changed for pal care |
|
|
40 |
#files = sorted(files, key=lambda x: int(x.split('.json')[0].split('_')[2])) |
|
|
41 |
files = sorted(files, key=lambda x: int(x.split('.json')[0])) |
|
|
42 |
|
|
|
43 |
# Get all patients in patient range |
|
|
44 |
patients = [] |
|
|
45 |
for i in patient_range: |
|
|
46 |
if i < len(files): |
|
|
47 |
file_path = patients_path + files[i] |
|
|
48 |
with open(file_path, 'r') as f: |
|
|
49 |
try: |
|
|
50 |
patient = json.load(f) |
|
|
51 |
patients.append(patient) |
|
|
52 |
except Exception as e: |
|
|
53 |
print "Error loading file: " + file_path |
|
|
54 |
raise e |
|
|
55 |
return patients |
|
|
56 |
|
|
|
57 |
def get_dummy_non_anonymized_patient(): |
|
|
58 |
return {'First_Name':'Josh', 'Last_Name':'Haimson', 'EMPI':'1234emPI', 'NEW_EMPI':'FAKE_EMPI_1', 'MRNS':['mrn12','mrn34']} |
|
|
59 |
|
|
|
60 |
#Removes empty fields from data |
|
|
61 |
#Recursive, modifies object, returns nothing |
|
|
62 |
def clean_data(data): |
|
|
63 |
if type(data) == type(dict()): |
|
|
64 |
for k in data.keys(): |
|
|
65 |
clean_data(data[k]) |
|
|
66 |
if data[k] in [u'', [], "", dict(), None]: |
|
|
67 |
data.pop(k) |
|
|
68 |
elif type(data) == type([]): |
|
|
69 |
for i in range(len(data)): |
|
|
70 |
clean_data(data[i]) |
|
|
71 |
|
|
|
72 |
#little function to help with exploring the data from commandline |
|
|
73 |
def explore(data): |
|
|
74 |
if type(data) == type(dict()) or (type(data) == type([]) and len(data)>1): |
|
|
75 |
again = True |
|
|
76 |
|
|
|
77 |
while again: |
|
|
78 |
if type(data) == type(dict()): |
|
|
79 |
keys = data.keys() |
|
|
80 |
for i in range(len(keys)): |
|
|
81 |
size = "" |
|
|
82 |
if type(data[keys[i]]) == type([]): |
|
|
83 |
if len(data[keys[i]]) > 1: |
|
|
84 |
size = " (" + str(len(data[keys[i]])) + ")" |
|
|
85 |
print str(i+1) + ".\t", keys[i], size |
|
|
86 |
inp = unicode("-1") |
|
|
87 |
while not(inp == "" or inp in keys or (inp.isnumeric() and ( int(inp) > 0 and int(inp) <= len(keys)))): |
|
|
88 |
inp = unicode(raw_input("Select a key: ")) |
|
|
89 |
print "" |
|
|
90 |
if inp != "" and inp.isnumeric(): |
|
|
91 |
print keys[int(inp)-1], ": " |
|
|
92 |
explore(data[keys[int(inp)-1]]) |
|
|
93 |
elif inp == "": |
|
|
94 |
again = False |
|
|
95 |
else: |
|
|
96 |
print inp, ": " |
|
|
97 |
explore(data[inp]) |
|
|
98 |
else: |
|
|
99 |
inp = unicode("-1") |
|
|
100 |
while not(inp == "" or (inp.isnumeric() and int(inp) > 0 and int(inp) <= len(data))): |
|
|
101 |
inp = unicode(raw_input("Select a file from 1-" + str(len(data)) +": ")) |
|
|
102 |
if inp != "": |
|
|
103 |
explore(data[int(inp)-1]) |
|
|
104 |
else: |
|
|
105 |
again = False |
|
|
106 |
elif type(data) == type([]): |
|
|
107 |
explore(data[0]) |
|
|
108 |
else: |
|
|
109 |
print data.strip("\n") |
|
|
110 |
print |
|
|
111 |
inp = raw_input("[ Press enter to continue ]") |
|
|
112 |
print |
|
|
113 |
|
|
|
114 |
|