[8d2107]: / palCareEval.py

Download this file

158 lines (140 with data), 5.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
from collections import Counter
from datetime import datetime, timedelta
from loader import get_data
class Range:
def __init__(self, low, high):
self.low = self.icd9_to_float(low)
self.high = self.icd9_to_float(high)
def icd9_to_float(self, icd9):
try:
return float(icd9)
except ValueError:
extension = float(icd9[1:])
letter = icd9[0]
return ord(letter)*1000 + extension
def __eq__(self, other):
other = self.icd9_to_float(other)
return other >= self.low and other <= self.high
def __ne__(self, other):
return not self.__eq__(other)
num_patients = 1579
#num_patients = 100
dead_patients = []
gender = []
age = []
is_cancer = []
consult_time = []
cancer_time = []
cancer_mgh_time = []
cardio_mgh_time = []
noncancer_time = []
utilization = []
utilization_cancer = []
utilization_mgh_cancer = []
utilization_noncancer = []
utilization_mgh_cardio = []
for i in range(num_patients):
p = get_data([i])[0]
# Filter to only dead patients
if p['Vital_status'] == 'Date of Death reported from SS Death Master File':
if p['Consult_Date'] not in [None, '']:
dead_patients.append(p['EMPI'])
gender.append(p['Gender'])
# Dates
dob = datetime.strptime(p['Date_of_Birth'], "%m/%d/%Y")
dod = datetime.strptime(p['Date_Of_Death\r'], "%m/%d/%Y")
doc = datetime.strptime(p['Consult_Date'], "%m/%d/%Y")
age.append((dod - dob).days/365.0)
timing = dod - doc
consult_time.append(timing.days)
# Diagnoses
cancer_icds = [Range(140.00, 209.99), Range(230.00, 239.99)]
is_cancer_patient = False
for d in p['Dia']:
if d['Code_Type'] == 'ICD9':
try:
if d['Code'] in cancer_icds:
is_cancer_patient = True
except:
pass
is_cancer.append(is_cancer_patient)
# Stratify
if is_cancer_patient:
cancer_time.append(timing.days)
else:
noncancer_time.append(timing.days)
# Utilization
mgh_onc_enc_count = 0
mgh_cardio_enc_count = 0
utilization_cutoff = timedelta(days=30*3)
num_eol_enc = 0
len_eol_enc = 0
died_in_hospital = False
for enc in p['Enc']:
admitted = datetime.strptime(enc['Admit_Date'], "%m/%d/%Y")
# MGH?
if (dod - admitted) < timedelta(days=365):
if enc['Clinic_Name'] == 'Medical Oncology Group (609)':
mgh_onc_enc_count += 1
elif enc['Clinic_Name'] == 'Cardiology (12)':
mgh_cardio_enc_count += 1
if (dod - admitted) < utilization_cutoff:
num_eol_enc += 1
try:
discharged = datetime.strptime(enc['Discharge_Date'], "%m/%d/%Y")
if enc['Inpatient_Outpatient'] == 'Inpatient':
len_eol_enc += (discharged-admitted).days
if dod == discharged:
died_in_hospital = True
except:
pass
utilization.append((timing.days, num_eol_enc, len_eol_enc, died_in_hospital))
if is_cancer_patient:
utilization_cancer.append((timing.days, num_eol_enc, len_eol_enc, died_in_hospital))
else:
utilization_noncancer.append((timing.days, num_eol_enc, len_eol_enc, died_in_hospital))
# Is an MGH Oncology patient
if mgh_onc_enc_count >= 2:
utilization_mgh_cancer.append((timing.days, num_eol_enc, len_eol_enc, died_in_hospital))
cancer_mgh_time.append(timing.days)
if not is_cancer_patient and mgh_cardio_enc_count >= 2:
utilization_mgh_cardio.append((timing.days, num_eol_enc, len_eol_enc, died_in_hospital))
cardio_mgh_time.append(timing.days)
else:
if p['Vital_status'] != 'Not reported as deceased':
print "***********************"
print p['Vital_status']
print "***********************"
# Step 1:
print "Percent of patients dead: " + str(float(len(dead_patients))/num_patients)
# Step 2:
# Age
print "Age distribution:"
print age
# Gender
print "Gender Distribution: " + str(Counter(gender))
# Cancer?
print "Is Cancer Patient: " + str(Counter(is_cancer))
# Step 3:
print "Consult timing distribution:"
print consult_time
# Step 4:
print "Cancer patient consult times:"
print cancer_time
print "MGH Cancer patient consult times:"
print cancer_mgh_time
print "Non-Cancer patient consult times:"
print noncancer_time
print "MGH Cardio patient consult times:"
print cardio_mgh_time
# Step 5:
print "Utilization"
print utilization
print "Utilization: Cancer"
print utilization_cancer
print "Utilization: MGH Cancer"
print utilization_mgh_cancer
print "Utilization: Non Cancer"
print utilization_noncancer
print "Utilization: MGH Cardio"
print utilization_mgh_cardio