Switch to unified view

a b/Clinical Deterioration Prediction Model - Selection of Ensemble Algorithms .ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "markdown",
5
   "metadata": {
6
    "hide": true
7
   },
8
   "source": [
9
    "import numpy as np\n",
10
    "import os\n",
11
    "import pandas as pd\n",
12
    "from sklearn.model_selection import train_test_split\n",
13
    "from sklearn.ensemble import RandomForestClassifier\n",
14
    "# ^^^ pyforest auto-imports - don't write above this line\n",
15
    "import numpy as np\n",
16
    "import matplotlib.pyplot as plt\n",
17
    "import pandas as pd\n",
18
    "import os\n",
19
    "import sklearn\n",
20
    "import matplotlib.pyplot as plt\n",
21
    "import seaborn as sns\n",
22
    "import pandas as pd\n",
23
    "import os\n",
24
    "\n",
25
    "$$\n",
26
    "\\renewcommand{\\like}{{\\cal L}}\n",
27
    "\\renewcommand{\\loglike}{{\\ell}}\n",
28
    "\\renewcommand{\\err}{{\\cal E}}\n",
29
    "\\renewcommand{\\dat}{{\\cal D}}\n",
30
    "\\renewcommand{\\hyp}{{\\cal H}}\n",
31
    "\\renewcommand{\\Ex}[2]{E_{#1}[#2]}\n",
32
    "\\renewcommand{\\x}{{\\mathbf x}}\n",
33
    "\\renewcommand{\\v}[1]{{\\mathbf #1}}\n",
34
    "$$"
35
   ]
36
  },
37
  {
38
   "cell_type": "markdown",
39
   "metadata": {},
40
   "source": [
41
    "# Clinical Deterioration Prediction Model - Selection of Ensemble Algorithms "
42
   ]
43
  },
44
  {
45
   "cell_type": "markdown",
46
   "metadata": {},
47
   "source": [
48
    "## Data\n",
49
    "\n",
50
    "The final dataset used for the inferential statistics project includes unique ICU admission of 46,234 patients’ demographic (age), vital (blood pressure, heart rate, body temperature, and Glasgow Comma Scale), underlying conditions (HIV, metastatic cancer, and hematologic malignancy), admission type (scheduled surgical, medical, or unscheduled surgical), renal (urinary output, and Blood Urea Nitrogen), and others (serum bicarbonate level, sodium level, potassium level, and bilirubin level) data. This dataset is build based on the commonly used mortality prediction tool, Simplified Acute Physiology Score II (SAPSII). "
51
   ]
52
  },
53
  {
54
   "cell_type": "code",
55
   "execution_count": 1,
56
   "metadata": {},
57
   "outputs": [
58
    {
59
     "data": {
60
      "application/javascript": [
61
       "\n",
62
       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import os'); }\n",
63
       "    "
64
      ],
65
      "text/plain": [
66
       "<IPython.core.display.Javascript object>"
67
      ]
68
     },
69
     "metadata": {},
70
     "output_type": "display_data"
71
    },
72
    {
73
     "data": {
74
      "text/plain": [
75
       "'C:\\\\Users\\\\abebu\\\\Dropbox\\\\Data Science\\\\Projects\\\\Capstone Project 1\\\\Potential Projects\\\\9. MIMIC\\\\Machine Learning\\\\Clinical-Deterioration-Prediction-Model---Bayesian-Linear-Regression'"
76
      ]
77
     },
78
     "execution_count": 1,
79
     "metadata": {},
80
     "output_type": "execute_result"
81
    }
82
   ],
83
   "source": [
84
    "os.getcwd()"
85
   ]
86
  },
87
  {
88
   "cell_type": "code",
89
   "execution_count": 3,
90
   "metadata": {},
91
   "outputs": [
92
    {
93
     "data": {
94
      "application/javascript": [
95
       "\n",
96
       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import os'); }\n",
97
       "    "
98
      ],
99
      "text/plain": [
100
       "<IPython.core.display.Javascript object>"
101
      ]
102
     },
103
     "metadata": {},
104
     "output_type": "display_data"
105
    },
106
    {
107
     "data": {
108
      "application/javascript": [
109
       "\n",
110
       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import os\\nimport pandas as pd'); }\n",
111
       "    "
112
      ],
113
      "text/plain": [
114
       "<IPython.core.display.Javascript object>"
115
      ]
116
     },
117
     "metadata": {},
118
     "output_type": "display_data"
119
    },
120
    {
121
     "data": {
122
      "text/html": [
123
       "<div>\n",
124
       "<style scoped>\n",
125
       "    .dataframe tbody tr th:only-of-type {\n",
126
       "        vertical-align: middle;\n",
127
       "    }\n",
128
       "\n",
129
       "    .dataframe tbody tr th {\n",
130
       "        vertical-align: top;\n",
131
       "    }\n",
132
       "\n",
133
       "    .dataframe thead th {\n",
134
       "        text-align: right;\n",
135
       "    }\n",
136
       "</style>\n",
137
       "<table border=\"1\" class=\"dataframe\">\n",
138
       "  <thead>\n",
139
       "    <tr style=\"text-align: right;\">\n",
140
       "      <th></th>\n",
141
       "      <th>SUBJECT_ID</th>\n",
142
       "      <th>HADM_ID</th>\n",
143
       "      <th>ICUSTAY_ID</th>\n",
144
       "      <th>los</th>\n",
145
       "      <th>hdeath</th>\n",
146
       "      <th>death</th>\n",
147
       "      <th>admission</th>\n",
148
       "      <th>ud</th>\n",
149
       "      <th>bun</th>\n",
150
       "      <th>Bicarbonate</th>\n",
151
       "      <th>...</th>\n",
152
       "      <th>WBC_3.0</th>\n",
153
       "      <th>hr_0.0</th>\n",
154
       "      <th>hr_2.0</th>\n",
155
       "      <th>hr_4.0</th>\n",
156
       "      <th>hr_7.0</th>\n",
157
       "      <th>hr_11.0</th>\n",
158
       "      <th>bp_0.0</th>\n",
159
       "      <th>bp_2.0</th>\n",
160
       "      <th>bp_5.0</th>\n",
161
       "      <th>bp_13.0</th>\n",
162
       "    </tr>\n",
163
       "  </thead>\n",
164
       "  <tbody>\n",
165
       "    <tr>\n",
166
       "      <td>0</td>\n",
167
       "      <td>268</td>\n",
168
       "      <td>110404</td>\n",
169
       "      <td>280836</td>\n",
170
       "      <td>3.2490</td>\n",
171
       "      <td>1</td>\n",
172
       "      <td>1</td>\n",
173
       "      <td>8</td>\n",
174
       "      <td>0.0</td>\n",
175
       "      <td>6.0</td>\n",
176
       "      <td>0.0</td>\n",
177
       "      <td>...</td>\n",
178
       "      <td>0</td>\n",
179
       "      <td>0</td>\n",
180
       "      <td>0</td>\n",
181
       "      <td>0</td>\n",
182
       "      <td>0</td>\n",
183
       "      <td>1</td>\n",
184
       "      <td>0</td>\n",
185
       "      <td>0</td>\n",
186
       "      <td>0</td>\n",
187
       "      <td>1</td>\n",
188
       "    </tr>\n",
189
       "    <tr>\n",
190
       "      <td>1</td>\n",
191
       "      <td>269</td>\n",
192
       "      <td>106296</td>\n",
193
       "      <td>206613</td>\n",
194
       "      <td>3.2788</td>\n",
195
       "      <td>0</td>\n",
196
       "      <td>0</td>\n",
197
       "      <td>8</td>\n",
198
       "      <td>17.0</td>\n",
199
       "      <td>0.0</td>\n",
200
       "      <td>0.0</td>\n",
201
       "      <td>...</td>\n",
202
       "      <td>0</td>\n",
203
       "      <td>1</td>\n",
204
       "      <td>0</td>\n",
205
       "      <td>0</td>\n",
206
       "      <td>0</td>\n",
207
       "      <td>0</td>\n",
208
       "      <td>0</td>\n",
209
       "      <td>0</td>\n",
210
       "      <td>1</td>\n",
211
       "      <td>0</td>\n",
212
       "    </tr>\n",
213
       "    <tr>\n",
214
       "      <td>2</td>\n",
215
       "      <td>270</td>\n",
216
       "      <td>188028</td>\n",
217
       "      <td>220345</td>\n",
218
       "      <td>2.8939</td>\n",
219
       "      <td>0</td>\n",
220
       "      <td>0</td>\n",
221
       "      <td>0</td>\n",
222
       "      <td>0.0</td>\n",
223
       "      <td>0.0</td>\n",
224
       "      <td>0.0</td>\n",
225
       "      <td>...</td>\n",
226
       "      <td>0</td>\n",
227
       "      <td>0</td>\n",
228
       "      <td>0</td>\n",
229
       "      <td>0</td>\n",
230
       "      <td>0</td>\n",
231
       "      <td>1</td>\n",
232
       "      <td>0</td>\n",
233
       "      <td>0</td>\n",
234
       "      <td>0</td>\n",
235
       "      <td>1</td>\n",
236
       "    </tr>\n",
237
       "    <tr>\n",
238
       "      <td>3</td>\n",
239
       "      <td>271</td>\n",
240
       "      <td>173727</td>\n",
241
       "      <td>249196</td>\n",
242
       "      <td>2.0600</td>\n",
243
       "      <td>0</td>\n",
244
       "      <td>0</td>\n",
245
       "      <td>8</td>\n",
246
       "      <td>0.0</td>\n",
247
       "      <td>0.0</td>\n",
248
       "      <td>0.0</td>\n",
249
       "      <td>...</td>\n",
250
       "      <td>0</td>\n",
251
       "      <td>1</td>\n",
252
       "      <td>0</td>\n",
253
       "      <td>0</td>\n",
254
       "      <td>0</td>\n",
255
       "      <td>0</td>\n",
256
       "      <td>1</td>\n",
257
       "      <td>0</td>\n",
258
       "      <td>0</td>\n",
259
       "      <td>0</td>\n",
260
       "    </tr>\n",
261
       "    <tr>\n",
262
       "      <td>4</td>\n",
263
       "      <td>272</td>\n",
264
       "      <td>164716</td>\n",
265
       "      <td>210407</td>\n",
266
       "      <td>1.6202</td>\n",
267
       "      <td>0</td>\n",
268
       "      <td>0</td>\n",
269
       "      <td>8</td>\n",
270
       "      <td>0.0</td>\n",
271
       "      <td>0.0</td>\n",
272
       "      <td>0.0</td>\n",
273
       "      <td>...</td>\n",
274
       "      <td>0</td>\n",
275
       "      <td>1</td>\n",
276
       "      <td>0</td>\n",
277
       "      <td>0</td>\n",
278
       "      <td>0</td>\n",
279
       "      <td>0</td>\n",
280
       "      <td>0</td>\n",
281
       "      <td>0</td>\n",
282
       "      <td>1</td>\n",
283
       "      <td>0</td>\n",
284
       "    </tr>\n",
285
       "  </tbody>\n",
286
       "</table>\n",
287
       "<p>5 rows × 33 columns</p>\n",
288
       "</div>"
289
      ],
290
      "text/plain": [
291
       "   SUBJECT_ID  HADM_ID  ICUSTAY_ID     los  hdeath  death  admission    ud  \\\n",
292
       "0         268   110404      280836  3.2490       1      1          8   0.0   \n",
293
       "1         269   106296      206613  3.2788       0      0          8  17.0   \n",
294
       "2         270   188028      220345  2.8939       0      0          0   0.0   \n",
295
       "3         271   173727      249196  2.0600       0      0          8   0.0   \n",
296
       "4         272   164716      210407  1.6202       0      0          8   0.0   \n",
297
       "\n",
298
       "   bun  Bicarbonate  ...  WBC_3.0  hr_0.0  hr_2.0  hr_4.0  hr_7.0  hr_11.0  \\\n",
299
       "0  6.0          0.0  ...        0       0       0       0       0        1   \n",
300
       "1  0.0          0.0  ...        0       1       0       0       0        0   \n",
301
       "2  0.0          0.0  ...        0       0       0       0       0        1   \n",
302
       "3  0.0          0.0  ...        0       1       0       0       0        0   \n",
303
       "4  0.0          0.0  ...        0       1       0       0       0        0   \n",
304
       "\n",
305
       "   bp_0.0  bp_2.0  bp_5.0  bp_13.0  \n",
306
       "0       0       0       0        1  \n",
307
       "1       0       0       1        0  \n",
308
       "2       0       0       0        1  \n",
309
       "3       1       0       0        0  \n",
310
       "4       0       0       1        0  \n",
311
       "\n",
312
       "[5 rows x 33 columns]"
313
      ]
314
     },
315
     "execution_count": 3,
316
     "metadata": {},
317
     "output_type": "execute_result"
318
    }
319
   ],
320
   "source": [
321
    "os.chdir(\"C://Users/abebu/Google Drive/mimic-iii-clinical-database-1.4\")\n",
322
    "saps = pd.read_csv(\"saps_ts.csv\", header=0, index_col=0)\n",
323
    "saps.head()\n"
324
   ]
325
  },
326
  {
327
   "cell_type": "code",
328
   "execution_count": 4,
329
   "metadata": {},
330
   "outputs": [
331
    {
332
     "data": {
333
      "text/html": [
334
       "<div>\n",
335
       "<style scoped>\n",
336
       "    .dataframe tbody tr th:only-of-type {\n",
337
       "        vertical-align: middle;\n",
338
       "    }\n",
339
       "\n",
340
       "    .dataframe tbody tr th {\n",
341
       "        vertical-align: top;\n",
342
       "    }\n",
343
       "\n",
344
       "    .dataframe thead th {\n",
345
       "        text-align: right;\n",
346
       "    }\n",
347
       "</style>\n",
348
       "<table border=\"1\" class=\"dataframe\">\n",
349
       "  <thead>\n",
350
       "    <tr style=\"text-align: right;\">\n",
351
       "      <th></th>\n",
352
       "      <th>hdeath</th>\n",
353
       "      <th>admission</th>\n",
354
       "      <th>ud</th>\n",
355
       "      <th>bun</th>\n",
356
       "      <th>Bicarbonate</th>\n",
357
       "      <th>ventilation</th>\n",
358
       "      <th>Temp</th>\n",
359
       "      <th>Bilirubin</th>\n",
360
       "      <th>gcs</th>\n",
361
       "      <th>AGE</th>\n",
362
       "      <th>...</th>\n",
363
       "      <th>WBC_3.0</th>\n",
364
       "      <th>hr_0.0</th>\n",
365
       "      <th>hr_2.0</th>\n",
366
       "      <th>hr_4.0</th>\n",
367
       "      <th>hr_7.0</th>\n",
368
       "      <th>hr_11.0</th>\n",
369
       "      <th>bp_0.0</th>\n",
370
       "      <th>bp_2.0</th>\n",
371
       "      <th>bp_5.0</th>\n",
372
       "      <th>bp_13.0</th>\n",
373
       "    </tr>\n",
374
       "  </thead>\n",
375
       "  <tbody>\n",
376
       "    <tr>\n",
377
       "      <td>0</td>\n",
378
       "      <td>1</td>\n",
379
       "      <td>8</td>\n",
380
       "      <td>0.0</td>\n",
381
       "      <td>6.0</td>\n",
382
       "      <td>0.0</td>\n",
383
       "      <td>6.0</td>\n",
384
       "      <td>0.0</td>\n",
385
       "      <td>0.0</td>\n",
386
       "      <td>26.0</td>\n",
387
       "      <td>12.0</td>\n",
388
       "      <td>...</td>\n",
389
       "      <td>0</td>\n",
390
       "      <td>0</td>\n",
391
       "      <td>0</td>\n",
392
       "      <td>0</td>\n",
393
       "      <td>0</td>\n",
394
       "      <td>1</td>\n",
395
       "      <td>0</td>\n",
396
       "      <td>0</td>\n",
397
       "      <td>0</td>\n",
398
       "      <td>1</td>\n",
399
       "    </tr>\n",
400
       "    <tr>\n",
401
       "      <td>1</td>\n",
402
       "      <td>0</td>\n",
403
       "      <td>8</td>\n",
404
       "      <td>17.0</td>\n",
405
       "      <td>0.0</td>\n",
406
       "      <td>0.0</td>\n",
407
       "      <td>0.0</td>\n",
408
       "      <td>0.0</td>\n",
409
       "      <td>0.0</td>\n",
410
       "      <td>0.0</td>\n",
411
       "      <td>7.0</td>\n",
412
       "      <td>...</td>\n",
413
       "      <td>0</td>\n",
414
       "      <td>1</td>\n",
415
       "      <td>0</td>\n",
416
       "      <td>0</td>\n",
417
       "      <td>0</td>\n",
418
       "      <td>0</td>\n",
419
       "      <td>0</td>\n",
420
       "      <td>0</td>\n",
421
       "      <td>1</td>\n",
422
       "      <td>0</td>\n",
423
       "    </tr>\n",
424
       "    <tr>\n",
425
       "      <td>2</td>\n",
426
       "      <td>0</td>\n",
427
       "      <td>0</td>\n",
428
       "      <td>0.0</td>\n",
429
       "      <td>0.0</td>\n",
430
       "      <td>0.0</td>\n",
431
       "      <td>0.0</td>\n",
432
       "      <td>3.0</td>\n",
433
       "      <td>0.0</td>\n",
434
       "      <td>0.0</td>\n",
435
       "      <td>18.0</td>\n",
436
       "      <td>...</td>\n",
437
       "      <td>0</td>\n",
438
       "      <td>0</td>\n",
439
       "      <td>0</td>\n",
440
       "      <td>0</td>\n",
441
       "      <td>0</td>\n",
442
       "      <td>1</td>\n",
443
       "      <td>0</td>\n",
444
       "      <td>0</td>\n",
445
       "      <td>0</td>\n",
446
       "      <td>1</td>\n",
447
       "    </tr>\n",
448
       "    <tr>\n",
449
       "      <td>3</td>\n",
450
       "      <td>0</td>\n",
451
       "      <td>8</td>\n",
452
       "      <td>0.0</td>\n",
453
       "      <td>0.0</td>\n",
454
       "      <td>0.0</td>\n",
455
       "      <td>6.0</td>\n",
456
       "      <td>3.0</td>\n",
457
       "      <td>0.0</td>\n",
458
       "      <td>0.0</td>\n",
459
       "      <td>7.0</td>\n",
460
       "      <td>...</td>\n",
461
       "      <td>0</td>\n",
462
       "      <td>1</td>\n",
463
       "      <td>0</td>\n",
464
       "      <td>0</td>\n",
465
       "      <td>0</td>\n",
466
       "      <td>0</td>\n",
467
       "      <td>1</td>\n",
468
       "      <td>0</td>\n",
469
       "      <td>0</td>\n",
470
       "      <td>0</td>\n",
471
       "    </tr>\n",
472
       "    <tr>\n",
473
       "      <td>4</td>\n",
474
       "      <td>0</td>\n",
475
       "      <td>8</td>\n",
476
       "      <td>0.0</td>\n",
477
       "      <td>0.0</td>\n",
478
       "      <td>0.0</td>\n",
479
       "      <td>0.0</td>\n",
480
       "      <td>3.0</td>\n",
481
       "      <td>0.0</td>\n",
482
       "      <td>0.0</td>\n",
483
       "      <td>12.0</td>\n",
484
       "      <td>...</td>\n",
485
       "      <td>0</td>\n",
486
       "      <td>1</td>\n",
487
       "      <td>0</td>\n",
488
       "      <td>0</td>\n",
489
       "      <td>0</td>\n",
490
       "      <td>0</td>\n",
491
       "      <td>0</td>\n",
492
       "      <td>0</td>\n",
493
       "      <td>1</td>\n",
494
       "      <td>0</td>\n",
495
       "    </tr>\n",
496
       "    <tr>\n",
497
       "      <td>...</td>\n",
498
       "      <td>...</td>\n",
499
       "      <td>...</td>\n",
500
       "      <td>...</td>\n",
501
       "      <td>...</td>\n",
502
       "      <td>...</td>\n",
503
       "      <td>...</td>\n",
504
       "      <td>...</td>\n",
505
       "      <td>...</td>\n",
506
       "      <td>...</td>\n",
507
       "      <td>...</td>\n",
508
       "      <td>...</td>\n",
509
       "      <td>...</td>\n",
510
       "      <td>...</td>\n",
511
       "      <td>...</td>\n",
512
       "      <td>...</td>\n",
513
       "      <td>...</td>\n",
514
       "      <td>...</td>\n",
515
       "      <td>...</td>\n",
516
       "      <td>...</td>\n",
517
       "      <td>...</td>\n",
518
       "      <td>...</td>\n",
519
       "    </tr>\n",
520
       "    <tr>\n",
521
       "      <td>61112</td>\n",
522
       "      <td>0</td>\n",
523
       "      <td>8</td>\n",
524
       "      <td>0.0</td>\n",
525
       "      <td>0.0</td>\n",
526
       "      <td>0.0</td>\n",
527
       "      <td>6.0</td>\n",
528
       "      <td>3.0</td>\n",
529
       "      <td>0.0</td>\n",
530
       "      <td>26.0</td>\n",
531
       "      <td>16.0</td>\n",
532
       "      <td>...</td>\n",
533
       "      <td>0</td>\n",
534
       "      <td>0</td>\n",
535
       "      <td>1</td>\n",
536
       "      <td>0</td>\n",
537
       "      <td>0</td>\n",
538
       "      <td>0</td>\n",
539
       "      <td>0</td>\n",
540
       "      <td>0</td>\n",
541
       "      <td>0</td>\n",
542
       "      <td>1</td>\n",
543
       "    </tr>\n",
544
       "    <tr>\n",
545
       "      <td>61113</td>\n",
546
       "      <td>0</td>\n",
547
       "      <td>8</td>\n",
548
       "      <td>0.0</td>\n",
549
       "      <td>0.0</td>\n",
550
       "      <td>0.0</td>\n",
551
       "      <td>0.0</td>\n",
552
       "      <td>3.0</td>\n",
553
       "      <td>0.0</td>\n",
554
       "      <td>5.0</td>\n",
555
       "      <td>18.0</td>\n",
556
       "      <td>...</td>\n",
557
       "      <td>0</td>\n",
558
       "      <td>0</td>\n",
559
       "      <td>1</td>\n",
560
       "      <td>0</td>\n",
561
       "      <td>0</td>\n",
562
       "      <td>0</td>\n",
563
       "      <td>1</td>\n",
564
       "      <td>0</td>\n",
565
       "      <td>0</td>\n",
566
       "      <td>0</td>\n",
567
       "    </tr>\n",
568
       "    <tr>\n",
569
       "      <td>61114</td>\n",
570
       "      <td>0</td>\n",
571
       "      <td>0</td>\n",
572
       "      <td>0.0</td>\n",
573
       "      <td>0.0</td>\n",
574
       "      <td>0.0</td>\n",
575
       "      <td>6.0</td>\n",
576
       "      <td>3.0</td>\n",
577
       "      <td>0.0</td>\n",
578
       "      <td>7.0</td>\n",
579
       "      <td>7.0</td>\n",
580
       "      <td>...</td>\n",
581
       "      <td>0</td>\n",
582
       "      <td>1</td>\n",
583
       "      <td>0</td>\n",
584
       "      <td>0</td>\n",
585
       "      <td>0</td>\n",
586
       "      <td>0</td>\n",
587
       "      <td>0</td>\n",
588
       "      <td>0</td>\n",
589
       "      <td>1</td>\n",
590
       "      <td>0</td>\n",
591
       "    </tr>\n",
592
       "    <tr>\n",
593
       "      <td>61115</td>\n",
594
       "      <td>0</td>\n",
595
       "      <td>0</td>\n",
596
       "      <td>0.0</td>\n",
597
       "      <td>0.0</td>\n",
598
       "      <td>0.0</td>\n",
599
       "      <td>6.0</td>\n",
600
       "      <td>3.0</td>\n",
601
       "      <td>0.0</td>\n",
602
       "      <td>26.0</td>\n",
603
       "      <td>12.0</td>\n",
604
       "      <td>...</td>\n",
605
       "      <td>0</td>\n",
606
       "      <td>1</td>\n",
607
       "      <td>0</td>\n",
608
       "      <td>0</td>\n",
609
       "      <td>0</td>\n",
610
       "      <td>0</td>\n",
611
       "      <td>0</td>\n",
612
       "      <td>0</td>\n",
613
       "      <td>0</td>\n",
614
       "      <td>1</td>\n",
615
       "    </tr>\n",
616
       "    <tr>\n",
617
       "      <td>61116</td>\n",
618
       "      <td>0</td>\n",
619
       "      <td>8</td>\n",
620
       "      <td>0.0</td>\n",
621
       "      <td>0.0</td>\n",
622
       "      <td>0.0</td>\n",
623
       "      <td>11.0</td>\n",
624
       "      <td>3.0</td>\n",
625
       "      <td>0.0</td>\n",
626
       "      <td>26.0</td>\n",
627
       "      <td>0.0</td>\n",
628
       "      <td>...</td>\n",
629
       "      <td>0</td>\n",
630
       "      <td>1</td>\n",
631
       "      <td>0</td>\n",
632
       "      <td>0</td>\n",
633
       "      <td>0</td>\n",
634
       "      <td>0</td>\n",
635
       "      <td>0</td>\n",
636
       "      <td>0</td>\n",
637
       "      <td>1</td>\n",
638
       "      <td>0</td>\n",
639
       "    </tr>\n",
640
       "  </tbody>\n",
641
       "</table>\n",
642
       "<p>61117 rows × 28 columns</p>\n",
643
       "</div>"
644
      ],
645
      "text/plain": [
646
       "       hdeath  admission    ud  bun  Bicarbonate  ventilation  Temp  \\\n",
647
       "0           1          8   0.0  6.0          0.0          6.0   0.0   \n",
648
       "1           0          8  17.0  0.0          0.0          0.0   0.0   \n",
649
       "2           0          0   0.0  0.0          0.0          0.0   3.0   \n",
650
       "3           0          8   0.0  0.0          0.0          6.0   3.0   \n",
651
       "4           0          8   0.0  0.0          0.0          0.0   3.0   \n",
652
       "...       ...        ...   ...  ...          ...          ...   ...   \n",
653
       "61112       0          8   0.0  0.0          0.0          6.0   3.0   \n",
654
       "61113       0          8   0.0  0.0          0.0          0.0   3.0   \n",
655
       "61114       0          0   0.0  0.0          0.0          6.0   3.0   \n",
656
       "61115       0          0   0.0  0.0          0.0          6.0   3.0   \n",
657
       "61116       0          8   0.0  0.0          0.0         11.0   3.0   \n",
658
       "\n",
659
       "       Bilirubin   gcs   AGE  ...  WBC_3.0  hr_0.0  hr_2.0  hr_4.0  hr_7.0  \\\n",
660
       "0            0.0  26.0  12.0  ...        0       0       0       0       0   \n",
661
       "1            0.0   0.0   7.0  ...        0       1       0       0       0   \n",
662
       "2            0.0   0.0  18.0  ...        0       0       0       0       0   \n",
663
       "3            0.0   0.0   7.0  ...        0       1       0       0       0   \n",
664
       "4            0.0   0.0  12.0  ...        0       1       0       0       0   \n",
665
       "...          ...   ...   ...  ...      ...     ...     ...     ...     ...   \n",
666
       "61112        0.0  26.0  16.0  ...        0       0       1       0       0   \n",
667
       "61113        0.0   5.0  18.0  ...        0       0       1       0       0   \n",
668
       "61114        0.0   7.0   7.0  ...        0       1       0       0       0   \n",
669
       "61115        0.0  26.0  12.0  ...        0       1       0       0       0   \n",
670
       "61116        0.0  26.0   0.0  ...        0       1       0       0       0   \n",
671
       "\n",
672
       "       hr_11.0  bp_0.0  bp_2.0  bp_5.0  bp_13.0  \n",
673
       "0            1       0       0       0        1  \n",
674
       "1            0       0       0       1        0  \n",
675
       "2            1       0       0       0        1  \n",
676
       "3            0       1       0       0        0  \n",
677
       "4            0       0       0       1        0  \n",
678
       "...        ...     ...     ...     ...      ...  \n",
679
       "61112        0       0       0       0        1  \n",
680
       "61113        0       1       0       0        0  \n",
681
       "61114        0       0       0       1        0  \n",
682
       "61115        0       0       0       0        1  \n",
683
       "61116        0       0       0       1        0  \n",
684
       "\n",
685
       "[61117 rows x 28 columns]"
686
      ]
687
     },
688
     "execution_count": 4,
689
     "metadata": {},
690
     "output_type": "execute_result"
691
    }
692
   ],
693
   "source": [
694
    "saps_e=saps.drop(['los','death','SUBJECT_ID','HADM_ID', 'ICUSTAY_ID'], axis=1)\n",
695
    "saps_e"
696
   ]
697
  },
698
  {
699
   "cell_type": "code",
700
   "execution_count": 12,
701
   "metadata": {},
702
   "outputs": [],
703
   "source": [
704
    "# Pandas and numpy for data manipulation\n",
705
    "import pandas as pd\n",
706
    "import numpy as np\n",
707
    "np.random.seed(42)\n",
708
    "\n",
709
    " \n",
710
    "# Matplotlib and seaborn for plotting\n",
711
    "import matplotlib.pyplot as plt\n",
712
    "%matplotlib inline\n",
713
    "\n",
714
    "import matplotlib\n",
715
    "matplotlib.rcParams['font.size'] = 16\n",
716
    "matplotlib.rcParams['figure.figsize'] = (9, 9)\n",
717
    "\n",
718
    "import seaborn as sns\n",
719
    "\n",
720
    "# Scipy helper functions\n",
721
    "from scipy.stats import percentileofscore\n",
722
    "from scipy import stats"
723
   ]
724
  },
725
  {
726
   "cell_type": "code",
727
   "execution_count": 10,
728
   "metadata": {},
729
   "outputs": [],
730
   "source": [
731
    "# Standard ensembel ML Models for comparison\n",
732
    "\n",
733
    "from sklearn.ensemble import RandomForestClassifier\n",
734
    "from sklearn.ensemble import ExtraTreesClassifier\n",
735
    "from sklearn.ensemble import GradientBoostingClassifier\n",
736
    "\n",
737
    "# Splitting data into training/testing\n",
738
    "from sklearn.model_selection import train_test_split\n",
739
    "from sklearn.preprocessing import MinMaxScaler\n",
740
    "\n",
741
    "# Metrics\n",
742
    "from sklearn.metrics import mean_squared_error, mean_absolute_error, median_absolute_error\n",
743
    "\n",
744
    "# Distributions\n",
745
    "import scipy"
746
   ]
747
  },
748
  {
749
   "cell_type": "code",
750
   "execution_count": 14,
751
   "metadata": {},
752
   "outputs": [],
753
   "source": [
754
    "\n",
755
    "# PyMC3 for Bayesian Inference\n",
756
    "import pymc3 as pm"
757
   ]
758
  },
759
  {
760
   "cell_type": "code",
761
   "execution_count": 5,
762
   "metadata": {},
763
   "outputs": [],
764
   "source": [
765
    "\n",
766
    "# grade and returns training and testing datasets\n",
767
    "def format_data(df):\n",
768
    "    # Target is hospital death\n",
769
    "    labels = saps_e['hdeath']\n",
770
    "    \n",
771
    "    # Drop target (hdeath) from features\n",
772
    "    df = df.drop(columns=['hdeath'])\n",
773
    "    \n",
774
    "    # Split into training/testing sets with 30% split\n",
775
    "    X_train, X_test, y_train, y_test = train_test_split(df, labels, \n",
776
    "                                                        test_size = 0.30,\n",
777
    "                                                        random_state=42)\n",
778
    "    \n",
779
    "    return X_train, X_test, y_train, y_test\n"
780
   ]
781
  },
782
  {
783
   "cell_type": "code",
784
   "execution_count": 6,
785
   "metadata": {},
786
   "outputs": [
787
    {
788
     "data": {
789
      "application/javascript": [
790
       "\n",
791
       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import os\\nimport pandas as pd\\nfrom sklearn.model_selection import train_test_split'); }\n",
792
       "    "
793
      ],
794
      "text/plain": [
795
       "<IPython.core.display.Javascript object>"
796
      ]
797
     },
798
     "metadata": {},
799
     "output_type": "display_data"
800
    },
801
    {
802
     "data": {
803
      "text/html": [
804
       "<div>\n",
805
       "<style scoped>\n",
806
       "    .dataframe tbody tr th:only-of-type {\n",
807
       "        vertical-align: middle;\n",
808
       "    }\n",
809
       "\n",
810
       "    .dataframe tbody tr th {\n",
811
       "        vertical-align: top;\n",
812
       "    }\n",
813
       "\n",
814
       "    .dataframe thead th {\n",
815
       "        text-align: right;\n",
816
       "    }\n",
817
       "</style>\n",
818
       "<table border=\"1\" class=\"dataframe\">\n",
819
       "  <thead>\n",
820
       "    <tr style=\"text-align: right;\">\n",
821
       "      <th></th>\n",
822
       "      <th>admission</th>\n",
823
       "      <th>ud</th>\n",
824
       "      <th>bun</th>\n",
825
       "      <th>Bicarbonate</th>\n",
826
       "      <th>ventilation</th>\n",
827
       "      <th>Temp</th>\n",
828
       "      <th>Bilirubin</th>\n",
829
       "      <th>gcs</th>\n",
830
       "      <th>AGE</th>\n",
831
       "      <th>UO</th>\n",
832
       "      <th>...</th>\n",
833
       "      <th>WBC_3.0</th>\n",
834
       "      <th>hr_0.0</th>\n",
835
       "      <th>hr_2.0</th>\n",
836
       "      <th>hr_4.0</th>\n",
837
       "      <th>hr_7.0</th>\n",
838
       "      <th>hr_11.0</th>\n",
839
       "      <th>bp_0.0</th>\n",
840
       "      <th>bp_2.0</th>\n",
841
       "      <th>bp_5.0</th>\n",
842
       "      <th>bp_13.0</th>\n",
843
       "    </tr>\n",
844
       "  </thead>\n",
845
       "  <tbody>\n",
846
       "    <tr>\n",
847
       "      <td>53545</td>\n",
848
       "      <td>8</td>\n",
849
       "      <td>0.0</td>\n",
850
       "      <td>0.0</td>\n",
851
       "      <td>0.0</td>\n",
852
       "      <td>0.0</td>\n",
853
       "      <td>3.0</td>\n",
854
       "      <td>0.0</td>\n",
855
       "      <td>0.0</td>\n",
856
       "      <td>15.0</td>\n",
857
       "      <td>0.0</td>\n",
858
       "      <td>...</td>\n",
859
       "      <td>0</td>\n",
860
       "      <td>1</td>\n",
861
       "      <td>0</td>\n",
862
       "      <td>0</td>\n",
863
       "      <td>0</td>\n",
864
       "      <td>0</td>\n",
865
       "      <td>1</td>\n",
866
       "      <td>0</td>\n",
867
       "      <td>0</td>\n",
868
       "      <td>0</td>\n",
869
       "    </tr>\n",
870
       "    <tr>\n",
871
       "      <td>51512</td>\n",
872
       "      <td>8</td>\n",
873
       "      <td>0.0</td>\n",
874
       "      <td>6.0</td>\n",
875
       "      <td>0.0</td>\n",
876
       "      <td>0.0</td>\n",
877
       "      <td>3.0</td>\n",
878
       "      <td>0.0</td>\n",
879
       "      <td>0.0</td>\n",
880
       "      <td>18.0</td>\n",
881
       "      <td>0.0</td>\n",
882
       "      <td>...</td>\n",
883
       "      <td>0</td>\n",
884
       "      <td>0</td>\n",
885
       "      <td>1</td>\n",
886
       "      <td>0</td>\n",
887
       "      <td>0</td>\n",
888
       "      <td>0</td>\n",
889
       "      <td>0</td>\n",
890
       "      <td>0</td>\n",
891
       "      <td>1</td>\n",
892
       "      <td>0</td>\n",
893
       "    </tr>\n",
894
       "    <tr>\n",
895
       "      <td>23837</td>\n",
896
       "      <td>8</td>\n",
897
       "      <td>0.0</td>\n",
898
       "      <td>0.0</td>\n",
899
       "      <td>0.0</td>\n",
900
       "      <td>0.0</td>\n",
901
       "      <td>3.0</td>\n",
902
       "      <td>0.0</td>\n",
903
       "      <td>0.0</td>\n",
904
       "      <td>12.0</td>\n",
905
       "      <td>0.0</td>\n",
906
       "      <td>...</td>\n",
907
       "      <td>0</td>\n",
908
       "      <td>0</td>\n",
909
       "      <td>1</td>\n",
910
       "      <td>0</td>\n",
911
       "      <td>0</td>\n",
912
       "      <td>0</td>\n",
913
       "      <td>0</td>\n",
914
       "      <td>0</td>\n",
915
       "      <td>1</td>\n",
916
       "      <td>0</td>\n",
917
       "    </tr>\n",
918
       "    <tr>\n",
919
       "      <td>21929</td>\n",
920
       "      <td>0</td>\n",
921
       "      <td>0.0</td>\n",
922
       "      <td>10.0</td>\n",
923
       "      <td>0.0</td>\n",
924
       "      <td>6.0</td>\n",
925
       "      <td>3.0</td>\n",
926
       "      <td>0.0</td>\n",
927
       "      <td>5.0</td>\n",
928
       "      <td>18.0</td>\n",
929
       "      <td>0.0</td>\n",
930
       "      <td>...</td>\n",
931
       "      <td>0</td>\n",
932
       "      <td>1</td>\n",
933
       "      <td>0</td>\n",
934
       "      <td>0</td>\n",
935
       "      <td>0</td>\n",
936
       "      <td>0</td>\n",
937
       "      <td>0</td>\n",
938
       "      <td>0</td>\n",
939
       "      <td>0</td>\n",
940
       "      <td>1</td>\n",
941
       "    </tr>\n",
942
       "    <tr>\n",
943
       "      <td>57339</td>\n",
944
       "      <td>8</td>\n",
945
       "      <td>0.0</td>\n",
946
       "      <td>0.0</td>\n",
947
       "      <td>0.0</td>\n",
948
       "      <td>0.0</td>\n",
949
       "      <td>3.0</td>\n",
950
       "      <td>0.0</td>\n",
951
       "      <td>0.0</td>\n",
952
       "      <td>12.0</td>\n",
953
       "      <td>0.0</td>\n",
954
       "      <td>...</td>\n",
955
       "      <td>0</td>\n",
956
       "      <td>0</td>\n",
957
       "      <td>1</td>\n",
958
       "      <td>0</td>\n",
959
       "      <td>0</td>\n",
960
       "      <td>0</td>\n",
961
       "      <td>0</td>\n",
962
       "      <td>0</td>\n",
963
       "      <td>1</td>\n",
964
       "      <td>0</td>\n",
965
       "    </tr>\n",
966
       "  </tbody>\n",
967
       "</table>\n",
968
       "<p>5 rows × 27 columns</p>\n",
969
       "</div>"
970
      ],
971
      "text/plain": [
972
       "       admission   ud   bun  Bicarbonate  ventilation  Temp  Bilirubin  gcs  \\\n",
973
       "53545          8  0.0   0.0          0.0          0.0   3.0        0.0  0.0   \n",
974
       "51512          8  0.0   6.0          0.0          0.0   3.0        0.0  0.0   \n",
975
       "23837          8  0.0   0.0          0.0          0.0   3.0        0.0  0.0   \n",
976
       "21929          0  0.0  10.0          0.0          6.0   3.0        0.0  5.0   \n",
977
       "57339          8  0.0   0.0          0.0          0.0   3.0        0.0  0.0   \n",
978
       "\n",
979
       "        AGE   UO  ...  WBC_3.0  hr_0.0  hr_2.0  hr_4.0  hr_7.0  hr_11.0  \\\n",
980
       "53545  15.0  0.0  ...        0       1       0       0       0        0   \n",
981
       "51512  18.0  0.0  ...        0       0       1       0       0        0   \n",
982
       "23837  12.0  0.0  ...        0       0       1       0       0        0   \n",
983
       "21929  18.0  0.0  ...        0       1       0       0       0        0   \n",
984
       "57339  12.0  0.0  ...        0       0       1       0       0        0   \n",
985
       "\n",
986
       "       bp_0.0  bp_2.0  bp_5.0  bp_13.0  \n",
987
       "53545       1       0       0        0  \n",
988
       "51512       0       0       1        0  \n",
989
       "23837       0       0       1        0  \n",
990
       "21929       0       0       0        1  \n",
991
       "57339       0       0       1        0  \n",
992
       "\n",
993
       "[5 rows x 27 columns]"
994
      ]
995
     },
996
     "execution_count": 6,
997
     "metadata": {},
998
     "output_type": "execute_result"
999
    }
1000
   ],
1001
   "source": [
1002
    "X_train, X_test, y_train, y_test = format_data(saps_e)\n",
1003
    "X_train.head()\n"
1004
   ]
1005
  },
1006
  {
1007
   "cell_type": "code",
1008
   "execution_count": 7,
1009
   "metadata": {},
1010
   "outputs": [
1011
    {
1012
     "name": "stdout",
1013
     "output_type": "stream",
1014
     "text": [
1015
      "(42781, 27)\n",
1016
      "(18336, 27)\n"
1017
     ]
1018
    }
1019
   ],
1020
   "source": [
1021
    "print(X_train.shape)\n",
1022
    "print(X_test.shape)"
1023
   ]
1024
  },
1025
  {
1026
   "cell_type": "markdown",
1027
   "metadata": {},
1028
   "source": [
1029
    "### Standard Ensemble Machine Learning Models"
1030
   ]
1031
  },
1032
  {
1033
   "cell_type": "code",
1034
   "execution_count": 13,
1035
   "metadata": {},
1036
   "outputs": [],
1037
   "source": [
1038
    "# Evaluate several ml models by training on training set and testing on testing set\n",
1039
    "def evaluate(X_train, X_test, y_train, y_test):\n",
1040
    "    #Names of models\n",
1041
    "    model_name_list = ['Random Forest', 'Extra Trees',\n",
1042
    "                       'Gradient Boosted']\n",
1043
    "    model = np.arange(1, 4)\n",
1044
    "    train_accuracy = np.empty(len(model))\n",
1045
    "    test_accuracy = np.empty(len(model))\n",
1046
    "    # Instantiate the models\n",
1047
    "    model1 = RandomForestClassifier(n_estimators=100)\n",
1048
    "    model2 = ExtraTreesClassifier(n_estimators=100)\n",
1049
    "    model3 = GradientBoostingClassifier(n_estimators=100)\n",
1050
    "    results =pd.DataFrame(columns=['train_accuracy', 'test_accuracy'])    \n",
1051
    "    \n",
1052
    "    # Train and predict with each model\n",
1053
    "    for i, model in enumerate([model1, model2, model3]):\n",
1054
    "        model.fit(X_train, y_train)\n",
1055
    "        \n",
1056
    "        train_accuracy=model.score(X_train, y_train)\n",
1057
    "        test_accuracy=model.score(X_test, y_test)\n",
1058
    "        \n",
1059
    "        model_name=model_name_list[i]\n",
1060
    "        results.loc[model_name, :] =[train_accuracy, test_accuracy]\n",
1061
    "    return results\n",
1062
    "    "
1063
   ]
1064
  },
1065
  {
1066
   "cell_type": "code",
1067
   "execution_count": 14,
1068
   "metadata": {},
1069
   "outputs": [
1070
    {
1071
     "data": {
1072
      "application/javascript": [
1073
       "\n",
1074
       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import numpy as np\\nimport os\\nimport pandas as pd\\nfrom sklearn.model_selection import train_test_split\\nfrom sklearn.ensemble import RandomForestClassifier'); }\n",
1075
       "    "
1076
      ],
1077
      "text/plain": [
1078
       "<IPython.core.display.Javascript object>"
1079
      ]
1080
     },
1081
     "metadata": {},
1082
     "output_type": "display_data"
1083
    },
1084
    {
1085
     "data": {
1086
      "application/javascript": [
1087
       "\n",
1088
       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import numpy as np\\nimport os\\nimport pandas as pd\\nfrom sklearn.model_selection import train_test_split\\nfrom sklearn.ensemble import RandomForestClassifier'); }\n",
1089
       "    "
1090
      ],
1091
      "text/plain": [
1092
       "<IPython.core.display.Javascript object>"
1093
      ]
1094
     },
1095
     "metadata": {},
1096
     "output_type": "display_data"
1097
    },
1098
    {
1099
     "data": {
1100
      "application/javascript": [
1101
       "\n",
1102
       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import numpy as np\\nimport os\\nimport pandas as pd\\nfrom sklearn.model_selection import train_test_split\\nfrom sklearn.ensemble import RandomForestClassifier'); }\n",
1103
       "    "
1104
      ],
1105
      "text/plain": [
1106
       "<IPython.core.display.Javascript object>"
1107
      ]
1108
     },
1109
     "metadata": {},
1110
     "output_type": "display_data"
1111
    },
1112
    {
1113
     "data": {
1114
      "application/javascript": [
1115
       "\n",
1116
       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import numpy as np\\nimport os\\nimport pandas as pd\\nfrom sklearn.model_selection import train_test_split\\nfrom sklearn.ensemble import RandomForestClassifier'); }\n",
1117
       "    "
1118
      ],
1119
      "text/plain": [
1120
       "<IPython.core.display.Javascript object>"
1121
      ]
1122
     },
1123
     "metadata": {},
1124
     "output_type": "display_data"
1125
    },
1126
    {
1127
     "data": {
1128
      "text/html": [
1129
       "<div>\n",
1130
       "<style scoped>\n",
1131
       "    .dataframe tbody tr th:only-of-type {\n",
1132
       "        vertical-align: middle;\n",
1133
       "    }\n",
1134
       "\n",
1135
       "    .dataframe tbody tr th {\n",
1136
       "        vertical-align: top;\n",
1137
       "    }\n",
1138
       "\n",
1139
       "    .dataframe thead th {\n",
1140
       "        text-align: right;\n",
1141
       "    }\n",
1142
       "</style>\n",
1143
       "<table border=\"1\" class=\"dataframe\">\n",
1144
       "  <thead>\n",
1145
       "    <tr style=\"text-align: right;\">\n",
1146
       "      <th></th>\n",
1147
       "      <th>train_accuracy</th>\n",
1148
       "      <th>test_accuracy</th>\n",
1149
       "    </tr>\n",
1150
       "  </thead>\n",
1151
       "  <tbody>\n",
1152
       "    <tr>\n",
1153
       "      <td>Random Forest</td>\n",
1154
       "      <td>0.949323</td>\n",
1155
       "      <td>0.91983</td>\n",
1156
       "    </tr>\n",
1157
       "    <tr>\n",
1158
       "      <td>Extra Trees</td>\n",
1159
       "      <td>0.949323</td>\n",
1160
       "      <td>0.919339</td>\n",
1161
       "    </tr>\n",
1162
       "    <tr>\n",
1163
       "      <td>Gradient Boosted</td>\n",
1164
       "      <td>0.924149</td>\n",
1165
       "      <td>0.925993</td>\n",
1166
       "    </tr>\n",
1167
       "  </tbody>\n",
1168
       "</table>\n",
1169
       "</div>"
1170
      ],
1171
      "text/plain": [
1172
       "                 train_accuracy test_accuracy\n",
1173
       "Random Forest          0.949323       0.91983\n",
1174
       "Extra Trees            0.949323      0.919339\n",
1175
       "Gradient Boosted       0.924149      0.925993"
1176
      ]
1177
     },
1178
     "execution_count": 14,
1179
     "metadata": {},
1180
     "output_type": "execute_result"
1181
    }
1182
   ],
1183
   "source": [
1184
    "results = evaluate(X_train, X_test, y_train, y_test)\n",
1185
    "results"
1186
   ]
1187
  },
1188
  {
1189
   "cell_type": "code",
1190
   "execution_count": null,
1191
   "metadata": {},
1192
   "outputs": [],
1193
   "source": []
1194
  }
1195
 ],
1196
 "metadata": {
1197
  "kernelspec": {
1198
   "display_name": "Python 3",
1199
   "language": "python",
1200
   "name": "python3"
1201
  },
1202
  "language_info": {
1203
   "codemirror_mode": {
1204
    "name": "ipython",
1205
    "version": 3
1206
   },
1207
   "file_extension": ".py",
1208
   "mimetype": "text/x-python",
1209
   "name": "python",
1210
   "nbconvert_exporter": "python",
1211
   "pygments_lexer": "ipython3",
1212
   "version": "3.7.4"
1213
  }
1214
 },
1215
 "nbformat": 4,
1216
 "nbformat_minor": 1
1217
}