a b/Serialized/prepare_ensembling.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 1,
6
   "metadata": {},
7
   "outputs": [
8
    {
9
     "name": "stderr",
10
     "output_type": "stream",
11
     "text": [
12
      "/home/reina/anaconda3/envs/RSNA/lib/python3.6/importlib/_bootstrap.py:219: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__\n",
13
      "  return f(*args, **kwds)\n",
14
      "/home/reina/anaconda3/envs/RSNA/lib/python3.6/importlib/_bootstrap.py:219: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__\n",
15
      "  return f(*args, **kwds)\n"
16
     ]
17
    }
18
   ],
19
   "source": [
20
    "from __future__ import absolute_import\n",
21
    "from __future__ import division\n",
22
    "from __future__ import print_function\n",
23
    "\n",
24
    "\n",
25
    "import numpy as np # linear algebra\n",
26
    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
27
    "import os\n",
28
    "import datetime\n",
29
    "import seaborn as sns\n",
30
    "\n",
31
    "#import pydicom\n",
32
    "import time\n",
33
    "from functools import partial\n",
34
    "import gc\n",
35
    "import operator \n",
36
    "import matplotlib.pyplot as plt\n",
37
    "import torch\n",
38
    "import torch.nn as nn\n",
39
    "import torch.utils.data as D\n",
40
    "import torch.nn.functional as F\n",
41
    "from sklearn.model_selection import KFold\n",
42
    "from tqdm import tqdm, tqdm_notebook\n",
43
    "from IPython.core.interactiveshell import InteractiveShell\n",
44
    "InteractiveShell.ast_node_interactivity = \"all\"\n",
45
    "import warnings\n",
46
    "warnings.filterwarnings(action='once')\n",
47
    "import pickle\n",
48
    "%load_ext autoreload\n",
49
    "%autoreload 2\n",
50
    "%matplotlib inline\n",
51
    "from skimage.io import imread,imshow\n",
52
    "from helper import *\n",
53
    "import helper\n",
54
    "import torchvision.models as models\n",
55
    "from torch.optim import Adam\n",
56
    "from defenitions import *"
57
   ]
58
  },
59
  {
60
   "cell_type": "code",
61
   "execution_count": 2,
62
   "metadata": {},
63
   "outputs": [],
64
   "source": [
65
    "# here you should set which model parameters you want to choose (see definitions.py) and what GPU to use\n",
66
    "\n",
67
    "device=device_by_name(\"Tesla\") # RTX , cpu\n",
68
    "torch.cuda.set_device(device)"
69
   ]
70
  },
71
  {
72
   "cell_type": "code",
73
   "execution_count": 3,
74
   "metadata": {},
75
   "outputs": [
76
    {
77
     "data": {
78
      "text/plain": [
79
       "(674252, 15)"
80
      ]
81
     },
82
     "execution_count": 3,
83
     "metadata": {},
84
     "output_type": "execute_result"
85
    },
86
    {
87
     "data": {
88
      "text/plain": [
89
       "(674252, 15)"
90
      ]
91
     },
92
     "execution_count": 3,
93
     "metadata": {},
94
     "output_type": "execute_result"
95
    },
96
    {
97
     "data": {
98
      "text/html": [
99
       "<div>\n",
100
       "<style scoped>\n",
101
       "    .dataframe tbody tr th:only-of-type {\n",
102
       "        vertical-align: middle;\n",
103
       "    }\n",
104
       "\n",
105
       "    .dataframe tbody tr th {\n",
106
       "        vertical-align: top;\n",
107
       "    }\n",
108
       "\n",
109
       "    .dataframe thead th {\n",
110
       "        text-align: right;\n",
111
       "    }\n",
112
       "</style>\n",
113
       "<table border=\"1\" class=\"dataframe\">\n",
114
       "  <thead>\n",
115
       "    <tr style=\"text-align: right;\">\n",
116
       "      <th></th>\n",
117
       "      <th>PatientID</th>\n",
118
       "      <th>epidural</th>\n",
119
       "      <th>intraparenchymal</th>\n",
120
       "      <th>intraventricular</th>\n",
121
       "      <th>subarachnoid</th>\n",
122
       "      <th>subdural</th>\n",
123
       "      <th>any</th>\n",
124
       "      <th>PID</th>\n",
125
       "      <th>StudyI</th>\n",
126
       "      <th>SeriesI</th>\n",
127
       "      <th>WindowCenter</th>\n",
128
       "      <th>WindowWidth</th>\n",
129
       "      <th>ImagePositionZ</th>\n",
130
       "      <th>ImagePositionX</th>\n",
131
       "      <th>ImagePositionY</th>\n",
132
       "    </tr>\n",
133
       "  </thead>\n",
134
       "  <tbody>\n",
135
       "    <tr>\n",
136
       "      <th>0</th>\n",
137
       "      <td>63eb1e259</td>\n",
138
       "      <td>0</td>\n",
139
       "      <td>0</td>\n",
140
       "      <td>0</td>\n",
141
       "      <td>0</td>\n",
142
       "      <td>0</td>\n",
143
       "      <td>0</td>\n",
144
       "      <td>a449357f</td>\n",
145
       "      <td>62d125e5b2</td>\n",
146
       "      <td>0be5c0d1b3</td>\n",
147
       "      <td>['00036', '00036']</td>\n",
148
       "      <td>['00080', '00080']</td>\n",
149
       "      <td>180.199951</td>\n",
150
       "      <td>-125.0</td>\n",
151
       "      <td>-8.000000</td>\n",
152
       "    </tr>\n",
153
       "    <tr>\n",
154
       "      <th>1</th>\n",
155
       "      <td>2669954a7</td>\n",
156
       "      <td>0</td>\n",
157
       "      <td>0</td>\n",
158
       "      <td>0</td>\n",
159
       "      <td>0</td>\n",
160
       "      <td>0</td>\n",
161
       "      <td>0</td>\n",
162
       "      <td>363d5865</td>\n",
163
       "      <td>a20b80c7bf</td>\n",
164
       "      <td>3564d584db</td>\n",
165
       "      <td>['00047', '00047']</td>\n",
166
       "      <td>['00080', '00080']</td>\n",
167
       "      <td>922.530821</td>\n",
168
       "      <td>-156.0</td>\n",
169
       "      <td>45.572849</td>\n",
170
       "    </tr>\n",
171
       "    <tr>\n",
172
       "      <th>2</th>\n",
173
       "      <td>52c9913b1</td>\n",
174
       "      <td>0</td>\n",
175
       "      <td>0</td>\n",
176
       "      <td>0</td>\n",
177
       "      <td>0</td>\n",
178
       "      <td>0</td>\n",
179
       "      <td>0</td>\n",
180
       "      <td>9c2b4bd7</td>\n",
181
       "      <td>3e3634f8cf</td>\n",
182
       "      <td>973274ffc9</td>\n",
183
       "      <td>40</td>\n",
184
       "      <td>150</td>\n",
185
       "      <td>4.455000</td>\n",
186
       "      <td>-125.0</td>\n",
187
       "      <td>-115.063000</td>\n",
188
       "    </tr>\n",
189
       "    <tr>\n",
190
       "      <th>3</th>\n",
191
       "      <td>4e6ff6126</td>\n",
192
       "      <td>0</td>\n",
193
       "      <td>0</td>\n",
194
       "      <td>0</td>\n",
195
       "      <td>0</td>\n",
196
       "      <td>0</td>\n",
197
       "      <td>0</td>\n",
198
       "      <td>3ae81c2d</td>\n",
199
       "      <td>a1390c15c2</td>\n",
200
       "      <td>e5ccad8244</td>\n",
201
       "      <td>['00036', '00036']</td>\n",
202
       "      <td>['00080', '00080']</td>\n",
203
       "      <td>100.000000</td>\n",
204
       "      <td>-99.5</td>\n",
205
       "      <td>28.500000</td>\n",
206
       "    </tr>\n",
207
       "    <tr>\n",
208
       "      <th>4</th>\n",
209
       "      <td>7858edd88</td>\n",
210
       "      <td>0</td>\n",
211
       "      <td>0</td>\n",
212
       "      <td>0</td>\n",
213
       "      <td>0</td>\n",
214
       "      <td>0</td>\n",
215
       "      <td>0</td>\n",
216
       "      <td>c1867feb</td>\n",
217
       "      <td>c73e81ed3a</td>\n",
218
       "      <td>28e0531b3a</td>\n",
219
       "      <td>40</td>\n",
220
       "      <td>100</td>\n",
221
       "      <td>145.793000</td>\n",
222
       "      <td>-125.0</td>\n",
223
       "      <td>-132.190000</td>\n",
224
       "    </tr>\n",
225
       "  </tbody>\n",
226
       "</table>\n",
227
       "</div>"
228
      ],
229
      "text/plain": [
230
       "   PatientID  epidural  intraparenchymal  intraventricular  subarachnoid  \\\n",
231
       "0  63eb1e259         0                 0                 0             0   \n",
232
       "1  2669954a7         0                 0                 0             0   \n",
233
       "2  52c9913b1         0                 0                 0             0   \n",
234
       "3  4e6ff6126         0                 0                 0             0   \n",
235
       "4  7858edd88         0                 0                 0             0   \n",
236
       "\n",
237
       "   subdural  any       PID      StudyI     SeriesI        WindowCenter  \\\n",
238
       "0         0    0  a449357f  62d125e5b2  0be5c0d1b3  ['00036', '00036']   \n",
239
       "1         0    0  363d5865  a20b80c7bf  3564d584db  ['00047', '00047']   \n",
240
       "2         0    0  9c2b4bd7  3e3634f8cf  973274ffc9                  40   \n",
241
       "3         0    0  3ae81c2d  a1390c15c2  e5ccad8244  ['00036', '00036']   \n",
242
       "4         0    0  c1867feb  c73e81ed3a  28e0531b3a                  40   \n",
243
       "\n",
244
       "          WindowWidth  ImagePositionZ  ImagePositionX  ImagePositionY  \n",
245
       "0  ['00080', '00080']      180.199951          -125.0       -8.000000  \n",
246
       "1  ['00080', '00080']      922.530821          -156.0       45.572849  \n",
247
       "2                 150        4.455000          -125.0     -115.063000  \n",
248
       "3  ['00080', '00080']      100.000000           -99.5       28.500000  \n",
249
       "4                 100      145.793000          -125.0     -132.190000  "
250
      ]
251
     },
252
     "execution_count": 3,
253
     "metadata": {},
254
     "output_type": "execute_result"
255
    }
256
   ],
257
   "source": [
258
    "train_df = pd.read_csv(data_dir+'train.csv')\n",
259
    "train_df.shape\n",
260
    "train_df=train_df[~train_df.PatientID.isin(bad_images)].reset_index(drop=True)\n",
261
    "train_df=train_df.drop_duplicates().reset_index(drop=True)\n",
262
    "train_df.shape\n",
263
    "train_df.head()"
264
   ]
265
  },
266
  {
267
   "cell_type": "code",
268
   "execution_count": 12,
269
   "metadata": {},
270
   "outputs": [
271
    {
272
     "data": {
273
      "text/html": [
274
       "<div>\n",
275
       "<style scoped>\n",
276
       "    .dataframe tbody tr th:only-of-type {\n",
277
       "        vertical-align: middle;\n",
278
       "    }\n",
279
       "\n",
280
       "    .dataframe tbody tr th {\n",
281
       "        vertical-align: top;\n",
282
       "    }\n",
283
       "\n",
284
       "    .dataframe thead th {\n",
285
       "        text-align: right;\n",
286
       "    }\n",
287
       "</style>\n",
288
       "<table border=\"1\" class=\"dataframe\">\n",
289
       "  <thead>\n",
290
       "    <tr style=\"text-align: right;\">\n",
291
       "      <th></th>\n",
292
       "      <th>PatientID</th>\n",
293
       "      <th>epidural</th>\n",
294
       "      <th>intraparenchymal</th>\n",
295
       "      <th>intraventricular</th>\n",
296
       "      <th>subarachnoid</th>\n",
297
       "      <th>subdural</th>\n",
298
       "      <th>any</th>\n",
299
       "      <th>SeriesI</th>\n",
300
       "      <th>PID</th>\n",
301
       "      <th>StudyI</th>\n",
302
       "      <th>WindowCenter</th>\n",
303
       "      <th>WindowWidth</th>\n",
304
       "      <th>ImagePositionZ</th>\n",
305
       "      <th>ImagePositionX</th>\n",
306
       "      <th>ImagePositionY</th>\n",
307
       "    </tr>\n",
308
       "  </thead>\n",
309
       "  <tbody>\n",
310
       "    <tr>\n",
311
       "      <th>0</th>\n",
312
       "      <td>28fbab7eb</td>\n",
313
       "      <td>0.5</td>\n",
314
       "      <td>0.5</td>\n",
315
       "      <td>0.5</td>\n",
316
       "      <td>0.5</td>\n",
317
       "      <td>0.5</td>\n",
318
       "      <td>0.5</td>\n",
319
       "      <td>ebfd7e4506</td>\n",
320
       "      <td>cf1b6b11</td>\n",
321
       "      <td>93407cadbb</td>\n",
322
       "      <td>30</td>\n",
323
       "      <td>80</td>\n",
324
       "      <td>158.458000</td>\n",
325
       "      <td>-125.0</td>\n",
326
       "      <td>-135.598000</td>\n",
327
       "    </tr>\n",
328
       "    <tr>\n",
329
       "      <th>1</th>\n",
330
       "      <td>877923b8b</td>\n",
331
       "      <td>0.5</td>\n",
332
       "      <td>0.5</td>\n",
333
       "      <td>0.5</td>\n",
334
       "      <td>0.5</td>\n",
335
       "      <td>0.5</td>\n",
336
       "      <td>0.5</td>\n",
337
       "      <td>6d95084e15</td>\n",
338
       "      <td>ad8ea58f</td>\n",
339
       "      <td>a337baa067</td>\n",
340
       "      <td>30</td>\n",
341
       "      <td>80</td>\n",
342
       "      <td>138.729050</td>\n",
343
       "      <td>-125.0</td>\n",
344
       "      <td>-101.797981</td>\n",
345
       "    </tr>\n",
346
       "    <tr>\n",
347
       "      <th>2</th>\n",
348
       "      <td>a591477cb</td>\n",
349
       "      <td>0.5</td>\n",
350
       "      <td>0.5</td>\n",
351
       "      <td>0.5</td>\n",
352
       "      <td>0.5</td>\n",
353
       "      <td>0.5</td>\n",
354
       "      <td>0.5</td>\n",
355
       "      <td>8e06b2c9e0</td>\n",
356
       "      <td>ecfb278b</td>\n",
357
       "      <td>0cfe838d54</td>\n",
358
       "      <td>30</td>\n",
359
       "      <td>80</td>\n",
360
       "      <td>60.830002</td>\n",
361
       "      <td>-125.0</td>\n",
362
       "      <td>-133.300003</td>\n",
363
       "    </tr>\n",
364
       "    <tr>\n",
365
       "      <th>3</th>\n",
366
       "      <td>42217c898</td>\n",
367
       "      <td>0.5</td>\n",
368
       "      <td>0.5</td>\n",
369
       "      <td>0.5</td>\n",
370
       "      <td>0.5</td>\n",
371
       "      <td>0.5</td>\n",
372
       "      <td>0.5</td>\n",
373
       "      <td>e800f419cf</td>\n",
374
       "      <td>e96e31f4</td>\n",
375
       "      <td>c497ac5bad</td>\n",
376
       "      <td>30</td>\n",
377
       "      <td>80</td>\n",
378
       "      <td>55.388000</td>\n",
379
       "      <td>-125.0</td>\n",
380
       "      <td>-146.081000</td>\n",
381
       "    </tr>\n",
382
       "    <tr>\n",
383
       "      <th>4</th>\n",
384
       "      <td>a130c4d2f</td>\n",
385
       "      <td>0.5</td>\n",
386
       "      <td>0.5</td>\n",
387
       "      <td>0.5</td>\n",
388
       "      <td>0.5</td>\n",
389
       "      <td>0.5</td>\n",
390
       "      <td>0.5</td>\n",
391
       "      <td>faeb7454f3</td>\n",
392
       "      <td>69affa42</td>\n",
393
       "      <td>854e4fbc01</td>\n",
394
       "      <td>30</td>\n",
395
       "      <td>80</td>\n",
396
       "      <td>33.516888</td>\n",
397
       "      <td>-125.0</td>\n",
398
       "      <td>-118.689819</td>\n",
399
       "    </tr>\n",
400
       "  </tbody>\n",
401
       "</table>\n",
402
       "</div>"
403
      ],
404
      "text/plain": [
405
       "   PatientID  epidural  intraparenchymal  intraventricular  subarachnoid  \\\n",
406
       "0  28fbab7eb       0.5               0.5               0.5           0.5   \n",
407
       "1  877923b8b       0.5               0.5               0.5           0.5   \n",
408
       "2  a591477cb       0.5               0.5               0.5           0.5   \n",
409
       "3  42217c898       0.5               0.5               0.5           0.5   \n",
410
       "4  a130c4d2f       0.5               0.5               0.5           0.5   \n",
411
       "\n",
412
       "   subdural  any     SeriesI       PID      StudyI WindowCenter WindowWidth  \\\n",
413
       "0       0.5  0.5  ebfd7e4506  cf1b6b11  93407cadbb           30          80   \n",
414
       "1       0.5  0.5  6d95084e15  ad8ea58f  a337baa067           30          80   \n",
415
       "2       0.5  0.5  8e06b2c9e0  ecfb278b  0cfe838d54           30          80   \n",
416
       "3       0.5  0.5  e800f419cf  e96e31f4  c497ac5bad           30          80   \n",
417
       "4       0.5  0.5  faeb7454f3  69affa42  854e4fbc01           30          80   \n",
418
       "\n",
419
       "   ImagePositionZ  ImagePositionX  ImagePositionY  \n",
420
       "0      158.458000          -125.0     -135.598000  \n",
421
       "1      138.729050          -125.0     -101.797981  \n",
422
       "2       60.830002          -125.0     -133.300003  \n",
423
       "3       55.388000          -125.0     -146.081000  \n",
424
       "4       33.516888          -125.0     -118.689819  "
425
      ]
426
     },
427
     "execution_count": 12,
428
     "metadata": {},
429
     "output_type": "execute_result"
430
    }
431
   ],
432
   "source": [
433
    "test_df = pd.read_csv(data_dir+'test.csv')\n",
434
    "test_df.head()"
435
   ]
436
  },
437
  {
438
   "cell_type": "code",
439
   "execution_count": 5,
440
   "metadata": {},
441
   "outputs": [],
442
   "source": [
443
    "def my_loss(y_pred,y_true,weights):\n",
444
    "    window=(y_true>=0).to(torch.float)\n",
445
    "    loss = (F.binary_cross_entropy_with_logits(y_pred,y_true,reduction='none')*window*weights.expand_as(y_true)).mean()/(window.mean()+1e-7)\n",
446
    "    return loss"
447
   ]
448
  },
449
  {
450
   "cell_type": "code",
451
   "execution_count": 6,
452
   "metadata": {},
453
   "outputs": [],
454
   "source": [
455
    "class Metric():\n",
456
    "    def __init__(self,weights,k=0.03):\n",
457
    "        self.weights=weights\n",
458
    "        self.k=k\n",
459
    "        self.zero()\n",
460
    "        \n",
461
    "    def zero(self):\n",
462
    "        self.loss_sum=0.\n",
463
    "        self.loss_count=0.\n",
464
    "        self.lossf=0.\n",
465
    "        \n",
466
    "    def calc(self,y_pred,y_true,prefix=\"\"):\n",
467
    "        window=(y_true>=0).to(torch.float)\n",
468
    "        loss = (F.binary_cross_entropy_with_logits(y_pred,y_true,reduction='none')*window*self.weights.expand_as(y_true)).mean()/(window.mean()+1e-5)\n",
469
    "        self.lossf=self.lossf*(1-self.k)+loss*self.k\n",
470
    "        self.loss_sum=self.loss_sum+loss*window.sum()\n",
471
    "        self.loss_count=self.loss_count+window.sum()\n",
472
    "        return({prefix+'mloss':self.lossf})    \n",
473
    "        \n",
474
    "    def calc_sums(self,prefix=\"\"):\n",
475
    "        return({prefix+'mloss_tot':self.loss_sum/self.loss_count})    \n",
476
    "\n"
477
   ]
478
  },
479
  {
480
   "cell_type": "code",
481
   "execution_count": 7,
482
   "metadata": {},
483
   "outputs": [],
484
   "source": [
485
    "model_names=[]\n",
486
    "types_train=[]\n",
487
    "types_test=[]\n",
488
    "versions=[]\n",
489
    "num_splits =[]\n",
490
    "seeds=[]\n",
491
    "for key in parameters.keys():\n",
492
    "    model_names.append(parameters[key]['model_name'])\n",
493
    "    types_train.append(parameters[key]['train_features'])\n",
494
    "    types_test.append(parameters[key]['test_features'])\n",
495
    "    versions.append(parameters[key]['version'])\n",
496
    "    num_splits.append(parameters[key]['n_splits'])\n",
497
    "    seeds.append(parameters[key]['SEED'])   "
498
   ]
499
  },
500
  {
501
   "cell_type": "code",
502
   "execution_count": null,
503
   "metadata": {},
504
   "outputs": [],
505
   "source": [
506
    "multi=3\n",
507
    "for model_name,type_,version_,n,SEED in zip(model_names,types_train,versions,num_splits,seeds):\n",
508
    "    for num_split in tqdm_notebook(range(n)):\n",
509
    "        pickle_file=open(outputs_dir+'PID_splits_{}.pkl'.format(n_splits),'rb')\n",
510
    "        split_sid,splits=pickle.load(pickle_file)\n",
511
    "        pickle_file.close()\n",
512
    "        pred_list=[]\n",
513
    "        print(model_name,version_,type_,num_split) \n",
514
    "        pickle_file=open(outputs_dir+outputs_format.format(model_name,version_,type_,num_split),'rb')\n",
515
    "        features=pickle.load(pickle_file)\n",
516
    "        pickle_file.close()\n",
517
    "        features=features.reshape(features.shape[0]//4,4,-1)\n",
518
    "        split_validate =  train_df[train_df.PID.isin(set(split_sid[splits[num_split][1]]))].SeriesI.unique()\n",
519
    "        model=ResModelPool(features.shape[-1])\n",
520
    "        version=version_+'_fullhead_resmodel_pool2_{}'.format(multi)\n",
521
    "\n",
522
    "        model.load_state_dict(torch.load(models_dir+models_format.format(model_name,version,num_split),map_location=torch.device(device)))\n",
523
    "\n",
524
    "        valid_dataset=FullHeadDataset(train_df,\n",
525
    "                                      split_validate,\n",
526
    "                                      features,\n",
527
    "                                      'SeriesI',\n",
528
    "                                      'ImagePositionZ',\n",
529
    "                                      multi =3)\n",
530
    "\n",
531
    "        win_dataset=FullHeadDataset(train_df,\n",
532
    "                                      split_validate,\n",
533
    "                                      features,\n",
534
    "                                      'SeriesI',\n",
535
    "                                      'ImagePositionZ',\n",
536
    "                                       target_columns=hemorrhage_types)\n",
537
    "        win_list=[]\n",
538
    "        dl = D.DataLoader(win_dataset,batch_size=128,num_workers=16)\n",
539
    "        for _,win in tqdm_notebook(dl):\n",
540
    "            win_list.append(win.reshape(win.shape[0]*win.shape[1],-1))    \n",
541
    "        wins = torch.cat(win_list,0).sum(1)>=0\n",
542
    "        wins.sum()\n",
543
    "        for i in tqdm_notebook(range(32),leave=False):\n",
544
    "            pr = model_run(model,valid_dataset,do_apex=False,batch_size=128)\n",
545
    "            pred_list.append(pr.reshape(pr.shape[0]*pr.shape[1],-1)[wins])\n",
546
    "        pickle_file=open(outputs_dir+outputs_format.format(model_name,version,'OOF_pred',num_split),'wb')\n",
547
    "        pickle.dump(pred_list,pickle_file,protocol=4)\n",
548
    "        pickle_file.close()\n"
549
   ]
550
  },
551
  {
552
   "cell_type": "code",
553
   "execution_count": null,
554
   "metadata": {},
555
   "outputs": [],
556
   "source": [
557
    "multi=3\n",
558
    "for model_name,type_,version_,n,SEED in zip(model_names,types_test,versions,num_splits,seeds):\n",
559
    "    for num_split in tqdm_notebook(range(n)):\n",
560
    "        pred_list=[]\n",
561
    "        print(model_name,version_,type_,num_split) \n",
562
    "        pickle_file=open(outputs_dir+outputs_format.format(model_name,version_,type_,num_split),'rb')\n",
563
    "        features=pickle.load(pickle_file)\n",
564
    "        pickle_file.close()\n",
565
    "        features=features.reshape(features.shape[0]//8,8,-1)\n",
566
    "        print(features.shape)\n",
567
    "        model=ResModelPool(features.shape[-1])\n",
568
    "        version=version_+'_fullhead_resmodel_pool2_{}'.format(multi)\n",
569
    "\n",
570
    "        model.load_state_dict(torch.load(models_dir+models_format.format(model_name,version,num_split),map_location=torch.device(device)))\n",
571
    "\n",
572
    "        valid_dataset=FullHeadDataset(test_df,\n",
573
    "                                      test_df.SeriesI.unique(),\n",
574
    "                                      features,\n",
575
    "                                      'SeriesI',\n",
576
    "                                      'ImagePositionZ',\n",
577
    "                                      multi =4)\n",
578
    "\n",
579
    "        win_dataset=FullHeadDataset(test_df,\n",
580
    "                                      test_df.SeriesI.unique(),\n",
581
    "                                      features,\n",
582
    "                                      'SeriesI',\n",
583
    "                                      'ImagePositionZ',\n",
584
    "                                       target_columns=hemorrhage_types)\n",
585
    "        win_list=[]\n",
586
    "        dl = D.DataLoader(win_dataset,batch_size=128,num_workers=16)\n",
587
    "        for _,win in tqdm_notebook(dl):\n",
588
    "            win_list.append(win.reshape(win.shape[0]*win.shape[1],-1))    \n",
589
    "        wins = torch.cat(win_list,0).sum(1)>=0\n",
590
    "        wins.sum()\n",
591
    "        for i in tqdm_notebook(range(32),leave=False):\n",
592
    "            pr = model_run(model,valid_dataset,do_apex=False,batch_size=128)\n",
593
    "            pred_list.append(pr.reshape(pr.shape[0]*pr.shape[1],-1)[wins])\n",
594
    "        pickle_file=open(outputs_dir+outputs_format.format(model_name,version,'test_pred_ensemble',num_split),'wb')\n",
595
    "        pickle.dump(pred_list,pickle_file,protocol=4)\n",
596
    "        pickle_file.close()\n"
597
   ]
598
  },
599
  {
600
   "cell_type": "code",
601
   "execution_count": null,
602
   "metadata": {},
603
   "outputs": [],
604
   "source": [
605
    "OOF_ids={}\n",
606
    "SEED=8153\n",
607
    "n_splits=3\n",
608
    "pickle_file=open(outputs_dir+'PID_splits_{}.pkl'.format(n_splits),'rb')\n",
609
    "split_sid,splits=pickle.load(pickle_file)\n",
610
    "pickle_file.close()\n",
611
    "for i in range( n_splits):\n",
612
    "    images_id_list=[]\n",
613
    "    split_validate =  train_df[train_df.PID.isin(set(split_sid[splits[i][1]]))].SeriesI.unique()\n",
614
    "    image_arr=train_df.PatientID.values\n",
615
    "    ref_arr=train_df.SeriesI.values\n",
616
    "    order_arr=train_df.ImagePositionZ.values\n",
617
    "    for s in tqdm_notebook(split_validate):\n",
618
    "        head_idx = np.where(ref_arr==s)[0]\n",
619
    "        sorted_head_idx=head_idx[np.argsort(order_arr[head_idx])]\n",
620
    "        images_id_list.append(image_arr[sorted_head_idx])\n",
621
    "    image_ids=np.concatenate(images_id_list)\n",
622
    "    print(image_ids.shape,train_df[train_df.PID.isin(set(split_sid[splits[i][1]]))].shape[0])\n",
623
    "    OOF_ids[i]=image_ids\n",
624
    "\n",
625
    "\n",
626
    "pickle_file=open(outputs_dir+'OOF_validation_image_ids_{}.pkl'.format(n_splits),'wb')\n",
627
    "pickle.dump(OOF_ids,pickle_file,protocol=4)\n",
628
    "pickle_file.close()\n"
629
   ]
630
  },
631
  {
632
   "cell_type": "code",
633
   "execution_count": null,
634
   "metadata": {},
635
   "outputs": [],
636
   "source": [
637
    "OOF_ids={}\n",
638
    "SEED=432\n",
639
    "n_splits=5\n",
640
    "pickle_file=open(outputs_dir+'PID_splits_{}.pkl'.format(n_splits),'rb')\n",
641
    "split_sid,splits=pickle.load(pickle_file)\n",
642
    "pickle_file.close()\n",
643
    "for i in range( n_splits):\n",
644
    "    images_id_list=[]\n",
645
    "    split_validate =  train_df[train_df.PID.isin(set(split_sid[splits[i][1]]))].SeriesI.unique()\n",
646
    "    image_arr=train_df.PatientID.values\n",
647
    "    ref_arr=train_df.SeriesI.values\n",
648
    "    order_arr=train_df.ImagePositionZ.values\n",
649
    "    for s in tqdm_notebook(split_validate):\n",
650
    "        head_idx = np.where(ref_arr==s)[0]\n",
651
    "        sorted_head_idx=head_idx[np.argsort(order_arr[head_idx])]\n",
652
    "        images_id_list.append(image_arr[sorted_head_idx])\n",
653
    "    image_ids=np.concatenate(images_id_list)\n",
654
    "    print(image_ids.shape,train_df[train_df.PID.isin(set(split_sid[splits[i][1]]))].shape[0])\n",
655
    "    OOF_ids[i]=image_ids\n",
656
    "\n",
657
    "\n",
658
    "pickle_file=open(outputs_dir+'OOF_validation_image_ids_{}.pkl'.format(n_splits),'wb')\n",
659
    "pickle.dump(OOF_ids,pickle_file,protocol=4)\n",
660
    "pickle_file.close()\n"
661
   ]
662
  },
663
  {
664
   "cell_type": "code",
665
   "execution_count": 14,
666
   "metadata": {},
667
   "outputs": [
668
    {
669
     "data": {
670
      "application/vnd.jupyter.widget-view+json": {
671
       "model_id": "05087bea7c1b4380a9c6bd7ff81eb4bd",
672
       "version_major": 2,
673
       "version_minor": 0
674
      },
675
      "text/plain": [
676
       "HBox(children=(IntProgress(value=0, max=2214), HTML(value='')))"
677
      ]
678
     },
679
     "metadata": {},
680
     "output_type": "display_data"
681
    }
682
   ],
683
   "source": [
684
    "images_id_list=[]\n",
685
    "dummeys=[]\n",
686
    "image_arr=test_df.PatientID.values\n",
687
    "ref_arr=test_df.SeriesI.values\n",
688
    "order_arr=test_df.ImagePositionZ.values\n",
689
    "for s in tqdm_notebook(test_df.SeriesI.unique()):\n",
690
    "    dumm=np.zeros(60)\n",
691
    "    head_idx = np.where(ref_arr==s)[0]\n",
692
    "    sorted_head_idx=head_idx[np.argsort(order_arr[head_idx])]\n",
693
    "    images_id_list.append(image_arr[sorted_head_idx])\n",
694
    "    dumm[0:head_idx.shape[0]]=1\n",
695
    "    dummeys.append(dumm)\n",
696
    "image_ids=np.concatenate(images_id_list)\n",
697
    "select=np.concatenate(dummeys)==1\n",
698
    "\n",
699
    "pickle_file=open(outputs_dir+'ensemble_test_image_ids.pkl','wb')\n",
700
    "pickle.dump(image_ids,pickle_file,protocol=4)\n",
701
    "pickle_file.close()\n"
702
   ]
703
  },
704
  {
705
   "cell_type": "code",
706
   "execution_count": null,
707
   "metadata": {},
708
   "outputs": [],
709
   "source": []
710
  }
711
 ],
712
 "metadata": {
713
  "kernelspec": {
714
   "display_name": "Python 3",
715
   "language": "python",
716
   "name": "python3"
717
  },
718
  "language_info": {
719
   "codemirror_mode": {
720
    "name": "ipython",
721
    "version": 3
722
   },
723
   "file_extension": ".py",
724
   "mimetype": "text/x-python",
725
   "name": "python",
726
   "nbconvert_exporter": "python",
727
   "pygments_lexer": "ipython3",
728
   "version": "3.6.6"
729
  }
730
 },
731
 "nbformat": 4,
732
 "nbformat_minor": 2
733
}