|
a |
|
b/tables.py |
|
|
1 |
from collections import defaultdict, Counter |
|
|
2 |
from datetime import date, datetime, timedelta |
|
|
3 |
import numpy as np |
|
|
4 |
|
|
|
5 |
from extract_data import get_operation_date, get_ef_values |
|
|
6 |
from language_processing import parse_m_d_y |
|
|
7 |
from loader import get_data |
|
|
8 |
|
|
|
9 |
def get_baseline_lab_value(p, lab_types, procedure_date): |
|
|
10 |
for lab in p['Lab']: |
|
|
11 |
if lab['Test_Description'] in lab_types: |
|
|
12 |
date = datetime.strptime(lab['Seq_Date_Time'], "%m/%d/%Y %H:%M").date() |
|
|
13 |
if date == procedure_date: |
|
|
14 |
try: |
|
|
15 |
return float(lab['Result']) |
|
|
16 |
except: |
|
|
17 |
return None |
|
|
18 |
return None |
|
|
19 |
|
|
|
20 |
def filter_out_post_procedure(documents, procedure_date, date_key): |
|
|
21 |
doc_list = [] |
|
|
22 |
for doc in documents: |
|
|
23 |
date = parse_m_d_y(doc[date_key]) |
|
|
24 |
p_delta = (date - procedure_date).days |
|
|
25 |
if p_delta <= 0: |
|
|
26 |
doc_list.append((p_delta, doc)) |
|
|
27 |
return sorted(doc_list) |
|
|
28 |
|
|
|
29 |
|
|
|
30 |
def get_n_preprocedure_dia(diagnoses, procedure_date, n): |
|
|
31 |
d_list = filter_out_post_procedure(diagnoses, procedure_date, 'Date') |
|
|
32 |
n = min(n, len(d_list)) |
|
|
33 |
return map(lambda x: x[1], d_list[-1*n:]) |
|
|
34 |
|
|
|
35 |
class Range: |
|
|
36 |
def __init__(self, low, high): |
|
|
37 |
self.low = self.icd9_to_float(low) |
|
|
38 |
self.high = self.icd9_to_float(high) |
|
|
39 |
def icd9_to_float(self, icd9): |
|
|
40 |
try: |
|
|
41 |
return float(icd9) |
|
|
42 |
except ValueError: |
|
|
43 |
extension = float(icd9[1:]) |
|
|
44 |
letter = icd9[0] |
|
|
45 |
return ord(letter)*1000 + extension |
|
|
46 |
def __eq__(self, other): |
|
|
47 |
other = self.icd9_to_float(other) |
|
|
48 |
return other >= self.low and other <= self.high |
|
|
49 |
def __ne__(self, other): |
|
|
50 |
return not self.__eq__(other) |
|
|
51 |
|
|
|
52 |
cpt = { |
|
|
53 |
'crt_out': [33224, 33225, 33226] |
|
|
54 |
} |
|
|
55 |
icds = { |
|
|
56 |
'crt_in': [00.50, 00.51], |
|
|
57 |
'ischemic': [410.0, 410.01, 410.02, 410.1, 410.10, 410.11, 410.12, 410.2, 410.20, 410.21, 410.22, 410.3, 410.30, 410.31, 410.32, 410.4, 410.40, 410.41, 410.42, 410.5, 410.50, 410.51, 410.52, 410.6, 410.60, 410.61, 410.62, 410.7, 410.70, 410.71, 410.72, 410.8, 410.80, 410.81, 410.82, 410.9, 410.90, 410.91, 410.92, 411.0, 411.1, 411.8, 411.81, 411.89, 412.0, 413.0, 413.1, 413.9, 414.0, 414.00, 414.01, 414.02, 414.03, 414.04, 414.05, 414.06, 414.07, 414.1, 414.10, 414.11, 414.12, 414.19, 414.2, 414.3, 414.4, 414.8, 414.9], |
|
|
58 |
'non-ischemic': [425.4], |
|
|
59 |
'arrhythmia': [427.1, 427.4, 427.41, 427.42, 427.5, 427.9], |
|
|
60 |
'lbbb': [426.3, 426.2, 426.51, 426.52, 426.53], |
|
|
61 |
'av_block': [426.0], |
|
|
62 |
'afib': [427.31], |
|
|
63 |
'cpd': [Range(490, 492.8), Range(493.00, 493.92), Range(494, 494.1), Range(495.0, 505), 506.4], |
|
|
64 |
'diabetes': [Range(250.00, 250.33), Range(250.40, 250.93)], |
|
|
65 |
'renal_disease': [403.01, 403.11, 403.91, 404.02, 404.03, 404.12, 404.13, 404.92, 404.93, 585, 586, 'V42.0', 'V45.1', Range('V56.0', 'V56.2'), 'V56.8'] |
|
|
66 |
} |
|
|
67 |
|
|
|
68 |
def get_ef_delta(patient_data): |
|
|
69 |
after_threshold = 365 |
|
|
70 |
ef_values = get_ef_values(patient_data) |
|
|
71 |
sorted_ef = sorted(ef_values) |
|
|
72 |
before = None |
|
|
73 |
before_date = None |
|
|
74 |
after = None |
|
|
75 |
after_date = None |
|
|
76 |
dist_from_thresh = float('inf') |
|
|
77 |
for (rel_date, ef_value) in sorted_ef: |
|
|
78 |
if rel_date <= 0: |
|
|
79 |
before = ef_value |
|
|
80 |
before_date = rel_date |
|
|
81 |
else: |
|
|
82 |
dist = abs(rel_date - after_threshold) |
|
|
83 |
if dist < dist_from_thresh: |
|
|
84 |
after = ef_value |
|
|
85 |
after_date = rel_date |
|
|
86 |
dist_from_thresh = dist |
|
|
87 |
if before is not None and after is not None: |
|
|
88 |
return (after - before, before, after, before_date, after_date) |
|
|
89 |
else: |
|
|
90 |
return (None, None, None, None, None) |
|
|
91 |
|
|
|
92 |
# Collect statistics |
|
|
93 |
has_procedure = 0 |
|
|
94 |
has_baseline = 0 |
|
|
95 |
no_baseline = [] |
|
|
96 |
has_followup = 0 |
|
|
97 |
stats = defaultdict(list) |
|
|
98 |
total = 1056 |
|
|
99 |
for i in range(total - 1): |
|
|
100 |
p = get_data([i])[0] |
|
|
101 |
print str(i) + " - " + p['EMPI'] |
|
|
102 |
|
|
|
103 |
procedure_date = get_operation_date(p) |
|
|
104 |
if procedure_date: |
|
|
105 |
has_procedure += 1 |
|
|
106 |
(ef_delta, baseline_ef, followup_ef, baseline_date, followup_date) = get_ef_delta(p) |
|
|
107 |
if not baseline_ef: |
|
|
108 |
no_baseline.append(p['EMPI']) |
|
|
109 |
if baseline_ef and baseline_date > -60: |
|
|
110 |
has_baseline += 1 |
|
|
111 |
if followup_date > 100 and followup_date < 500: |
|
|
112 |
has_followup += 1 |
|
|
113 |
stats['procedure_date'].append(procedure_date) |
|
|
114 |
stats['baseline_days'].append(baseline_date) |
|
|
115 |
stats['followup_days'].append(followup_date) |
|
|
116 |
stats['baseline_lvef'].append(baseline_ef) |
|
|
117 |
stats['lvef_followup'].append(followup_ef) |
|
|
118 |
stats['lvef_change'].append(ef_delta) |
|
|
119 |
|
|
|
120 |
stats['sex'].append(p['Gender']) |
|
|
121 |
stats['n_enc'].append(len(filter_out_post_procedure(p['Enc'], procedure_date, 'Admit_Date'))) |
|
|
122 |
|
|
|
123 |
if p['Date_Of_Death']: |
|
|
124 |
death_date = parse_m_d_y(p['Date_Of_Death']) |
|
|
125 |
stats['died_in_year'].append((death_date - procedure_date) < timedelta(365)) |
|
|
126 |
else: |
|
|
127 |
stats['died_in_year'].append(False) |
|
|
128 |
|
|
|
129 |
dia = get_n_preprocedure_dia(p['Dia'], procedure_date, 76) |
|
|
130 |
icd_present = defaultdict(lambda : False) |
|
|
131 |
for d in dia: |
|
|
132 |
if d['Code_Type'] == 'ICD9': |
|
|
133 |
try: |
|
|
134 |
code = float(d['Code']) |
|
|
135 |
except ValueError: |
|
|
136 |
code = d['Code'] |
|
|
137 |
for key in icds.keys(): |
|
|
138 |
if code in icds[key]: |
|
|
139 |
icd_present[key] = True |
|
|
140 |
""" |
|
|
141 |
elif d['Code_Type'] == 'CPT': |
|
|
142 |
try: |
|
|
143 |
code = float(d['Code']) |
|
|
144 |
for key in cpt.keys(): |
|
|
145 |
if code in cpt[key]: |
|
|
146 |
icd_present[key] = True |
|
|
147 |
except: |
|
|
148 |
pass |
|
|
149 |
""" |
|
|
150 |
|
|
|
151 |
|
|
|
152 |
for key in icds.keys(): |
|
|
153 |
stats[key].append(icd_present[key]) |
|
|
154 |
|
|
|
155 |
stats['baseline_creatinine'].append(get_baseline_lab_value(p, ['Plasma Creatinine', 'Creatinine'], procedure_date)) |
|
|
156 |
stats['baseline_sodium'].append(get_baseline_lab_value(p, ['Plasma Sodium'], procedure_date)) |
|
|
157 |
stats['baseline_hgb'].append(get_baseline_lab_value(p, ['HGB'], procedure_date)) |
|
|
158 |
|
|
|
159 |
print "Total: " + str(total) |
|
|
160 |
print "Has Procedure: " + str(has_procedure) |
|
|
161 |
print "Has Baseline: " + str(has_baseline) |
|
|
162 |
print "Has Follow up: " + str(has_followup) |
|
|
163 |
print "No Baseline:" |
|
|
164 |
print no_baseline |
|
|
165 |
|
|
|
166 |
print "Demographics:" |
|
|
167 |
print "Num: " + str(len(stats['procedure_date'])) |
|
|
168 |
sex = Counter(stats['sex']) |
|
|
169 |
print "Male: " + str(sex["Male"]/float(sum(sex.values()))) |
|
|
170 |
|
|
|
171 |
print "\nMGH Care:" |
|
|
172 |
iqr = np.subtract(*np.percentile(stats['n_enc'], [75, 25])) |
|
|
173 |
print "Median Pre-Procedure Encounters: " + str(np.median(stats['n_enc'])) + " (" + str(iqr) + ")" |
|
|
174 |
|
|
|
175 |
print "\nDiagnoses:" |
|
|
176 |
ischemic = Counter(stats['ischemic']) |
|
|
177 |
print "Ischemic: " + str(ischemic[True]/float(sum(ischemic.values()))) |
|
|
178 |
nonischemic = Counter(stats['non-ischemic']) |
|
|
179 |
print "Non-Ischemic: " + str(nonischemic[True]/float(sum(nonischemic.values()))) |
|
|
180 |
lbbb = Counter(stats['lbbb']) |
|
|
181 |
print "lbbb: " + str(lbbb[True]/float(sum(lbbb.values()))) |
|
|
182 |
arrhythmia = Counter(stats['arrhythmia']) |
|
|
183 |
print "arrhythmia: " + str(arrhythmia[True]/float(sum(arrhythmia.values()))) |
|
|
184 |
av_block = Counter(stats['av_block']) |
|
|
185 |
print "av_block: " + str(av_block[True]/float(sum(av_block.values()))) |
|
|
186 |
afib = Counter(stats['afib']) |
|
|
187 |
print "afib: " + str(afib[True]/float(sum(afib.values()))) |
|
|
188 |
crt_in = Counter(stats['crt_in']) |
|
|
189 |
print "crt_in: " + str(crt_in[True]/float(sum(crt_in.values()))) |
|
|
190 |
#crt_out = Counter(stats['crt_out']) |
|
|
191 |
#print "crt_out: " + str(crt_out[True]/float(sum(crt_out.values()))) |
|
|
192 |
|
|
|
193 |
print "\nComorbidities:" |
|
|
194 |
cpd = Counter(stats['cpd']) |
|
|
195 |
print "cpd: " + str(cpd[True]/float(sum(cpd.values()))) |
|
|
196 |
diabetes = Counter(stats['diabetes']) |
|
|
197 |
print "diabetes: " + str(diabetes[True]/float(sum(diabetes.values()))) |
|
|
198 |
renal_disease = Counter(stats['renal_disease']) |
|
|
199 |
print "renal_disease: " + str(renal_disease[True]/float(sum(renal_disease.values()))) |
|
|
200 |
|
|
|
201 |
|
|
|
202 |
#iqr = np.subtract(*np.percentile(x, [75, 25])) |
|
|
203 |
print "\nBaseline Data:" |
|
|
204 |
lvef_array = filter(lambda x: bool(x), stats['baseline_lvef']) |
|
|
205 |
print "LVEF: " + str(np.mean(lvef_array)) + " (" + str(np.std(lvef_array)) + ")" |
|
|
206 |
creatinine_array = filter(lambda x: bool(x), stats['baseline_creatinine']) |
|
|
207 |
print "Creatinine: " + str(np.mean(creatinine_array)) + " (" + str(np.std(creatinine_array)) + ")" |
|
|
208 |
sodium_array = filter(lambda x: bool(x), stats['baseline_sodium']) |
|
|
209 |
print "Sodium: " + str(np.mean(sodium_array)) + " (" + str(np.std(sodium_array)) + ")" |
|
|
210 |
hgb_array = filter(lambda x: bool(x), stats['baseline_hgb']) |
|
|
211 |
print "HGB: " + str(np.mean(hgb_array)) + " (" + str(np.std(hgb_array)) + ")" |
|
|
212 |
|
|
|
213 |
print "\nMedications:" |
|
|
214 |
|
|
|
215 |
|
|
|
216 |
print "\nYear:" |
|
|
217 |
print "Earliest: " + str(sorted(stats['procedure_date'])[:10]) |
|
|
218 |
print "Latest: " + str(max(stats['procedure_date'])) |
|
|
219 |
pre_2009 = Counter(map(lambda x: x < date(2009, 1, 1), stats['procedure_date'])) |
|
|
220 |
print "Pre-2009: " + str(pre_2009[True]/float(sum(pre_2009.values()))) |
|
|
221 |
p_2009_2012 = Counter(map(lambda x: x >= date(2009, 1, 1) and x < date(2013, 1, 1), stats['procedure_date'])) |
|
|
222 |
print "2009-2012: " + str(p_2009_2012[True]/float(sum(p_2009_2012.values()))) |
|
|
223 |
p_2012 = Counter(map(lambda x: x >= date(2013, 1, 1), stats['procedure_date'])) |
|
|
224 |
print "post-2012: " + str(p_2012[True]/float(sum(p_2012.values()))) |
|
|
225 |
|
|
|
226 |
print "\nTable 2" |
|
|
227 |
base_lvef_days = filter(lambda x: x is not None, stats['baseline_days']) |
|
|
228 |
print "Baseline Days: " + str(np.mean(base_lvef_days)) + " (" + str(np.std(base_lvef_days)) + ")" |
|
|
229 |
#print base_lvef_days |
|
|
230 |
lvef_days = filter(lambda x: x is not None, stats['followup_days']) |
|
|
231 |
#print lvef_days |
|
|
232 |
print "Followup Days: " + str(np.mean(lvef_days)) + " (" + str(np.std(lvef_days)) + ")" |
|
|
233 |
lvef_followup = filter(lambda x: x is not None, stats['lvef_followup']) |
|
|
234 |
print "Followup LVEF: " + str(np.mean(lvef_followup)) + " (" + str(np.std(lvef_followup)) + ")" |
|
|
235 |
lvef_change = filter(lambda x: x is not None, stats['lvef_change']) |
|
|
236 |
print "LVEF Change: " + str(np.mean(lvef_change)) + " (" + str(np.std(lvef_change)) + ")" |
|
|
237 |
def change_to_response(x): |
|
|
238 |
if x < 5: |
|
|
239 |
return "Non-Responder" |
|
|
240 |
elif x < 15: |
|
|
241 |
return "Responder" |
|
|
242 |
else: |
|
|
243 |
return "Super-Responder" |
|
|
244 |
lvef_response = Counter(map(change_to_response, lvef_change)) |
|
|
245 |
print "Non-Responder: " + str(lvef_response['Non-Responder']/float(sum(lvef_response.values()))) |
|
|
246 |
print "Responder: " + str(lvef_response['Responder']/float(sum(lvef_response.values()))) |
|
|
247 |
print "Super-Responder: " + str(lvef_response['Super-Responder']/float(sum(lvef_response.values()))) |
|
|
248 |
died_in_year = Counter(stats['died_in_year']) |
|
|
249 |
print "Died within 1 year: " + str(died_in_year[True]/float(sum(died_in_year.values()))) |