Switch to unified view

a b/Serialized/Post Full Head Models Train .ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 22,
6
   "metadata": {},
7
   "outputs": [
8
    {
9
     "name": "stdout",
10
     "output_type": "stream",
11
     "text": [
12
      "The autoreload extension is already loaded. To reload it, use:\n",
13
      "  %reload_ext autoreload\n"
14
     ]
15
    }
16
   ],
17
   "source": [
18
    "from __future__ import absolute_import\n",
19
    "from __future__ import division\n",
20
    "from __future__ import print_function\n",
21
    "\n",
22
    "\n",
23
    "import numpy as np # linear algebra\n",
24
    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
25
    "import os\n",
26
    "import datetime\n",
27
    "import seaborn as sns\n",
28
    "\n",
29
    "#import pydicom\n",
30
    "import time\n",
31
    "from functools import partial\n",
32
    "import gc\n",
33
    "import operator \n",
34
    "import matplotlib.pyplot as plt\n",
35
    "import torch\n",
36
    "import torch.nn as nn\n",
37
    "import torch.utils.data as D\n",
38
    "import torch.nn.functional as F\n",
39
    "from sklearn.model_selection import KFold\n",
40
    "from tqdm import tqdm, tqdm_notebook\n",
41
    "from IPython.core.interactiveshell import InteractiveShell\n",
42
    "InteractiveShell.ast_node_interactivity = \"all\"\n",
43
    "import warnings\n",
44
    "warnings.filterwarnings(action='once')\n",
45
    "import pickle\n",
46
    "%load_ext autoreload\n",
47
    "%autoreload 2\n",
48
    "%matplotlib inline\n",
49
    "from skimage.io import imread,imshow\n",
50
    "from helper import *\n",
51
    "import helper\n",
52
    "import torchvision.models as models\n",
53
    "from torch.optim import Adam\n",
54
    "from defenitions import *"
55
   ]
56
  },
57
  {
58
   "cell_type": "markdown",
59
   "metadata": {},
60
   "source": [
61
    "## Set parameters below"
62
   ]
63
  },
64
  {
65
   "cell_type": "code",
66
   "execution_count": 2,
67
   "metadata": {},
68
   "outputs": [],
69
   "source": [
70
    "# here you should set which model parameters you want to choose (see definitions.py) and what GPU to use\n",
71
    "params=parameters['se_resnext101_32x4d_3'] # se_resnet101_5, se_resnext101_32x4d_3, se_resnext101_32x4d_5\n",
72
    "\n",
73
    "device=device_by_name(\"Tesla\") # RTX , cpu\n",
74
    "torch.cuda.set_device(device)\n",
75
    "sendmeemail=Email_Progress(my_gmail,my_pass,to_email,'{} results'.format(params['model_name']))"
76
   ]
77
  },
78
  {
79
   "cell_type": "code",
80
   "execution_count": 3,
81
   "metadata": {},
82
   "outputs": [
83
    {
84
     "data": {
85
      "text/plain": [
86
       "{'model_name': 'se_resnext101_32x4d',\n",
87
       " 'SEED': 8153,\n",
88
       " 'n_splits': 3,\n",
89
       " 'Pre_version': None,\n",
90
       " 'focal': False,\n",
91
       " 'version': 'classifier_splits',\n",
92
       " 'train_prediction': 'predictions_train_tta',\n",
93
       " 'train_features': 'features_train_tta',\n",
94
       " 'test_prediction': 'predictions_test_tta',\n",
95
       " 'test_features': 'features_test_tta',\n",
96
       " 'num_epochs': 5,\n",
97
       " 'num_pool': 8}"
98
      ]
99
     },
100
     "execution_count": 3,
101
     "metadata": {},
102
     "output_type": "execute_result"
103
    }
104
   ],
105
   "source": [
106
    "params"
107
   ]
108
  },
109
  {
110
   "cell_type": "code",
111
   "execution_count": 4,
112
   "metadata": {},
113
   "outputs": [],
114
   "source": [
115
    "SEED = params['SEED']\n",
116
    "n_splits=params['n_splits']"
117
   ]
118
  },
119
  {
120
   "cell_type": "code",
121
   "execution_count": 5,
122
   "metadata": {},
123
   "outputs": [
124
    {
125
     "data": {
126
      "text/plain": [
127
       "(674252, 15)"
128
      ]
129
     },
130
     "execution_count": 5,
131
     "metadata": {},
132
     "output_type": "execute_result"
133
    },
134
    {
135
     "data": {
136
      "text/plain": [
137
       "(674252, 15)"
138
      ]
139
     },
140
     "execution_count": 5,
141
     "metadata": {},
142
     "output_type": "execute_result"
143
    },
144
    {
145
     "data": {
146
      "text/html": [
147
       "<div>\n",
148
       "<style scoped>\n",
149
       "    .dataframe tbody tr th:only-of-type {\n",
150
       "        vertical-align: middle;\n",
151
       "    }\n",
152
       "\n",
153
       "    .dataframe tbody tr th {\n",
154
       "        vertical-align: top;\n",
155
       "    }\n",
156
       "\n",
157
       "    .dataframe thead th {\n",
158
       "        text-align: right;\n",
159
       "    }\n",
160
       "</style>\n",
161
       "<table border=\"1\" class=\"dataframe\">\n",
162
       "  <thead>\n",
163
       "    <tr style=\"text-align: right;\">\n",
164
       "      <th></th>\n",
165
       "      <th>PatientID</th>\n",
166
       "      <th>epidural</th>\n",
167
       "      <th>intraparenchymal</th>\n",
168
       "      <th>intraventricular</th>\n",
169
       "      <th>subarachnoid</th>\n",
170
       "      <th>subdural</th>\n",
171
       "      <th>any</th>\n",
172
       "      <th>PID</th>\n",
173
       "      <th>StudyI</th>\n",
174
       "      <th>SeriesI</th>\n",
175
       "      <th>WindowCenter</th>\n",
176
       "      <th>WindowWidth</th>\n",
177
       "      <th>ImagePositionZ</th>\n",
178
       "      <th>ImagePositionX</th>\n",
179
       "      <th>ImagePositionY</th>\n",
180
       "    </tr>\n",
181
       "  </thead>\n",
182
       "  <tbody>\n",
183
       "    <tr>\n",
184
       "      <th>0</th>\n",
185
       "      <td>63eb1e259</td>\n",
186
       "      <td>0</td>\n",
187
       "      <td>0</td>\n",
188
       "      <td>0</td>\n",
189
       "      <td>0</td>\n",
190
       "      <td>0</td>\n",
191
       "      <td>0</td>\n",
192
       "      <td>a449357f</td>\n",
193
       "      <td>62d125e5b2</td>\n",
194
       "      <td>0be5c0d1b3</td>\n",
195
       "      <td>['00036', '00036']</td>\n",
196
       "      <td>['00080', '00080']</td>\n",
197
       "      <td>180.199951</td>\n",
198
       "      <td>-125.0</td>\n",
199
       "      <td>-8.000000</td>\n",
200
       "    </tr>\n",
201
       "    <tr>\n",
202
       "      <th>1</th>\n",
203
       "      <td>2669954a7</td>\n",
204
       "      <td>0</td>\n",
205
       "      <td>0</td>\n",
206
       "      <td>0</td>\n",
207
       "      <td>0</td>\n",
208
       "      <td>0</td>\n",
209
       "      <td>0</td>\n",
210
       "      <td>363d5865</td>\n",
211
       "      <td>a20b80c7bf</td>\n",
212
       "      <td>3564d584db</td>\n",
213
       "      <td>['00047', '00047']</td>\n",
214
       "      <td>['00080', '00080']</td>\n",
215
       "      <td>922.530821</td>\n",
216
       "      <td>-156.0</td>\n",
217
       "      <td>45.572849</td>\n",
218
       "    </tr>\n",
219
       "    <tr>\n",
220
       "      <th>2</th>\n",
221
       "      <td>52c9913b1</td>\n",
222
       "      <td>0</td>\n",
223
       "      <td>0</td>\n",
224
       "      <td>0</td>\n",
225
       "      <td>0</td>\n",
226
       "      <td>0</td>\n",
227
       "      <td>0</td>\n",
228
       "      <td>9c2b4bd7</td>\n",
229
       "      <td>3e3634f8cf</td>\n",
230
       "      <td>973274ffc9</td>\n",
231
       "      <td>40</td>\n",
232
       "      <td>150</td>\n",
233
       "      <td>4.455000</td>\n",
234
       "      <td>-125.0</td>\n",
235
       "      <td>-115.063000</td>\n",
236
       "    </tr>\n",
237
       "    <tr>\n",
238
       "      <th>3</th>\n",
239
       "      <td>4e6ff6126</td>\n",
240
       "      <td>0</td>\n",
241
       "      <td>0</td>\n",
242
       "      <td>0</td>\n",
243
       "      <td>0</td>\n",
244
       "      <td>0</td>\n",
245
       "      <td>0</td>\n",
246
       "      <td>3ae81c2d</td>\n",
247
       "      <td>a1390c15c2</td>\n",
248
       "      <td>e5ccad8244</td>\n",
249
       "      <td>['00036', '00036']</td>\n",
250
       "      <td>['00080', '00080']</td>\n",
251
       "      <td>100.000000</td>\n",
252
       "      <td>-99.5</td>\n",
253
       "      <td>28.500000</td>\n",
254
       "    </tr>\n",
255
       "    <tr>\n",
256
       "      <th>4</th>\n",
257
       "      <td>7858edd88</td>\n",
258
       "      <td>0</td>\n",
259
       "      <td>0</td>\n",
260
       "      <td>0</td>\n",
261
       "      <td>0</td>\n",
262
       "      <td>0</td>\n",
263
       "      <td>0</td>\n",
264
       "      <td>c1867feb</td>\n",
265
       "      <td>c73e81ed3a</td>\n",
266
       "      <td>28e0531b3a</td>\n",
267
       "      <td>40</td>\n",
268
       "      <td>100</td>\n",
269
       "      <td>145.793000</td>\n",
270
       "      <td>-125.0</td>\n",
271
       "      <td>-132.190000</td>\n",
272
       "    </tr>\n",
273
       "  </tbody>\n",
274
       "</table>\n",
275
       "</div>"
276
      ],
277
      "text/plain": [
278
       "   PatientID  epidural  intraparenchymal  intraventricular  subarachnoid  \\\n",
279
       "0  63eb1e259         0                 0                 0             0   \n",
280
       "1  2669954a7         0                 0                 0             0   \n",
281
       "2  52c9913b1         0                 0                 0             0   \n",
282
       "3  4e6ff6126         0                 0                 0             0   \n",
283
       "4  7858edd88         0                 0                 0             0   \n",
284
       "\n",
285
       "   subdural  any       PID      StudyI     SeriesI        WindowCenter  \\\n",
286
       "0         0    0  a449357f  62d125e5b2  0be5c0d1b3  ['00036', '00036']   \n",
287
       "1         0    0  363d5865  a20b80c7bf  3564d584db  ['00047', '00047']   \n",
288
       "2         0    0  9c2b4bd7  3e3634f8cf  973274ffc9                  40   \n",
289
       "3         0    0  3ae81c2d  a1390c15c2  e5ccad8244  ['00036', '00036']   \n",
290
       "4         0    0  c1867feb  c73e81ed3a  28e0531b3a                  40   \n",
291
       "\n",
292
       "          WindowWidth  ImagePositionZ  ImagePositionX  ImagePositionY  \n",
293
       "0  ['00080', '00080']      180.199951          -125.0       -8.000000  \n",
294
       "1  ['00080', '00080']      922.530821          -156.0       45.572849  \n",
295
       "2                 150        4.455000          -125.0     -115.063000  \n",
296
       "3  ['00080', '00080']      100.000000           -99.5       28.500000  \n",
297
       "4                 100      145.793000          -125.0     -132.190000  "
298
      ]
299
     },
300
     "execution_count": 5,
301
     "metadata": {},
302
     "output_type": "execute_result"
303
    }
304
   ],
305
   "source": [
306
    "train_df = pd.read_csv(data_dir+'train.csv')\n",
307
    "train_df.shape\n",
308
    "train_df=train_df[~train_df.PatientID.isin(bad_images)].reset_index(drop=True)\n",
309
    "train_df=train_df.drop_duplicates().reset_index(drop=True)\n",
310
    "train_df.shape\n",
311
    "train_df.head()"
312
   ]
313
  },
314
  {
315
   "cell_type": "code",
316
   "execution_count": 6,
317
   "metadata": {},
318
   "outputs": [
319
    {
320
     "data": {
321
      "text/html": [
322
       "<div>\n",
323
       "<style scoped>\n",
324
       "    .dataframe tbody tr th:only-of-type {\n",
325
       "        vertical-align: middle;\n",
326
       "    }\n",
327
       "\n",
328
       "    .dataframe tbody tr th {\n",
329
       "        vertical-align: top;\n",
330
       "    }\n",
331
       "\n",
332
       "    .dataframe thead th {\n",
333
       "        text-align: right;\n",
334
       "    }\n",
335
       "</style>\n",
336
       "<table border=\"1\" class=\"dataframe\">\n",
337
       "  <thead>\n",
338
       "    <tr style=\"text-align: right;\">\n",
339
       "      <th></th>\n",
340
       "      <th>PatientID</th>\n",
341
       "      <th>epidural</th>\n",
342
       "      <th>intraparenchymal</th>\n",
343
       "      <th>intraventricular</th>\n",
344
       "      <th>subarachnoid</th>\n",
345
       "      <th>subdural</th>\n",
346
       "      <th>any</th>\n",
347
       "      <th>SeriesI</th>\n",
348
       "      <th>PID</th>\n",
349
       "      <th>StudyI</th>\n",
350
       "      <th>WindowCenter</th>\n",
351
       "      <th>WindowWidth</th>\n",
352
       "      <th>ImagePositionZ</th>\n",
353
       "      <th>ImagePositionX</th>\n",
354
       "      <th>ImagePositionY</th>\n",
355
       "    </tr>\n",
356
       "  </thead>\n",
357
       "  <tbody>\n",
358
       "    <tr>\n",
359
       "      <th>0</th>\n",
360
       "      <td>28fbab7eb</td>\n",
361
       "      <td>0.5</td>\n",
362
       "      <td>0.5</td>\n",
363
       "      <td>0.5</td>\n",
364
       "      <td>0.5</td>\n",
365
       "      <td>0.5</td>\n",
366
       "      <td>0.5</td>\n",
367
       "      <td>ebfd7e4506</td>\n",
368
       "      <td>cf1b6b11</td>\n",
369
       "      <td>93407cadbb</td>\n",
370
       "      <td>30</td>\n",
371
       "      <td>80</td>\n",
372
       "      <td>158.458000</td>\n",
373
       "      <td>-125.0</td>\n",
374
       "      <td>-135.598000</td>\n",
375
       "    </tr>\n",
376
       "    <tr>\n",
377
       "      <th>1</th>\n",
378
       "      <td>877923b8b</td>\n",
379
       "      <td>0.5</td>\n",
380
       "      <td>0.5</td>\n",
381
       "      <td>0.5</td>\n",
382
       "      <td>0.5</td>\n",
383
       "      <td>0.5</td>\n",
384
       "      <td>0.5</td>\n",
385
       "      <td>6d95084e15</td>\n",
386
       "      <td>ad8ea58f</td>\n",
387
       "      <td>a337baa067</td>\n",
388
       "      <td>30</td>\n",
389
       "      <td>80</td>\n",
390
       "      <td>138.729050</td>\n",
391
       "      <td>-125.0</td>\n",
392
       "      <td>-101.797981</td>\n",
393
       "    </tr>\n",
394
       "    <tr>\n",
395
       "      <th>2</th>\n",
396
       "      <td>a591477cb</td>\n",
397
       "      <td>0.5</td>\n",
398
       "      <td>0.5</td>\n",
399
       "      <td>0.5</td>\n",
400
       "      <td>0.5</td>\n",
401
       "      <td>0.5</td>\n",
402
       "      <td>0.5</td>\n",
403
       "      <td>8e06b2c9e0</td>\n",
404
       "      <td>ecfb278b</td>\n",
405
       "      <td>0cfe838d54</td>\n",
406
       "      <td>30</td>\n",
407
       "      <td>80</td>\n",
408
       "      <td>60.830002</td>\n",
409
       "      <td>-125.0</td>\n",
410
       "      <td>-133.300003</td>\n",
411
       "    </tr>\n",
412
       "    <tr>\n",
413
       "      <th>3</th>\n",
414
       "      <td>42217c898</td>\n",
415
       "      <td>0.5</td>\n",
416
       "      <td>0.5</td>\n",
417
       "      <td>0.5</td>\n",
418
       "      <td>0.5</td>\n",
419
       "      <td>0.5</td>\n",
420
       "      <td>0.5</td>\n",
421
       "      <td>e800f419cf</td>\n",
422
       "      <td>e96e31f4</td>\n",
423
       "      <td>c497ac5bad</td>\n",
424
       "      <td>30</td>\n",
425
       "      <td>80</td>\n",
426
       "      <td>55.388000</td>\n",
427
       "      <td>-125.0</td>\n",
428
       "      <td>-146.081000</td>\n",
429
       "    </tr>\n",
430
       "    <tr>\n",
431
       "      <th>4</th>\n",
432
       "      <td>a130c4d2f</td>\n",
433
       "      <td>0.5</td>\n",
434
       "      <td>0.5</td>\n",
435
       "      <td>0.5</td>\n",
436
       "      <td>0.5</td>\n",
437
       "      <td>0.5</td>\n",
438
       "      <td>0.5</td>\n",
439
       "      <td>faeb7454f3</td>\n",
440
       "      <td>69affa42</td>\n",
441
       "      <td>854e4fbc01</td>\n",
442
       "      <td>30</td>\n",
443
       "      <td>80</td>\n",
444
       "      <td>33.516888</td>\n",
445
       "      <td>-125.0</td>\n",
446
       "      <td>-118.689819</td>\n",
447
       "    </tr>\n",
448
       "  </tbody>\n",
449
       "</table>\n",
450
       "</div>"
451
      ],
452
      "text/plain": [
453
       "   PatientID  epidural  intraparenchymal  intraventricular  subarachnoid  \\\n",
454
       "0  28fbab7eb       0.5               0.5               0.5           0.5   \n",
455
       "1  877923b8b       0.5               0.5               0.5           0.5   \n",
456
       "2  a591477cb       0.5               0.5               0.5           0.5   \n",
457
       "3  42217c898       0.5               0.5               0.5           0.5   \n",
458
       "4  a130c4d2f       0.5               0.5               0.5           0.5   \n",
459
       "\n",
460
       "   subdural  any     SeriesI       PID      StudyI WindowCenter WindowWidth  \\\n",
461
       "0       0.5  0.5  ebfd7e4506  cf1b6b11  93407cadbb           30          80   \n",
462
       "1       0.5  0.5  6d95084e15  ad8ea58f  a337baa067           30          80   \n",
463
       "2       0.5  0.5  8e06b2c9e0  ecfb278b  0cfe838d54           30          80   \n",
464
       "3       0.5  0.5  e800f419cf  e96e31f4  c497ac5bad           30          80   \n",
465
       "4       0.5  0.5  faeb7454f3  69affa42  854e4fbc01           30          80   \n",
466
       "\n",
467
       "   ImagePositionZ  ImagePositionX  ImagePositionY  \n",
468
       "0      158.458000          -125.0     -135.598000  \n",
469
       "1      138.729050          -125.0     -101.797981  \n",
470
       "2       60.830002          -125.0     -133.300003  \n",
471
       "3       55.388000          -125.0     -146.081000  \n",
472
       "4       33.516888          -125.0     -118.689819  "
473
      ]
474
     },
475
     "execution_count": 6,
476
     "metadata": {},
477
     "output_type": "execute_result"
478
    }
479
   ],
480
   "source": [
481
    "test_df = pd.read_csv(data_dir+'test.csv')\n",
482
    "test_df.head()"
483
   ]
484
  },
485
  {
486
   "cell_type": "code",
487
   "execution_count": 7,
488
   "metadata": {},
489
   "outputs": [],
490
   "source": [
491
    "split_sid = train_df.PID.unique()\n",
492
    "splits=list(KFold(n_splits=n_splits,shuffle=True, random_state=SEED).split(split_sid))\n"
493
   ]
494
  },
495
  {
496
   "cell_type": "code",
497
   "execution_count": 8,
498
   "metadata": {},
499
   "outputs": [],
500
   "source": [
501
    "def my_loss(y_pred,y_true,weights):\n",
502
    "    window=(y_true>=0).to(torch.float)\n",
503
    "    loss = (F.binary_cross_entropy_with_logits(y_pred,y_true,reduction='none')*window*weights.expand_as(y_true)).mean()/(window.mean()+1e-7)\n",
504
    "    return loss"
505
   ]
506
  },
507
  {
508
   "cell_type": "code",
509
   "execution_count": 9,
510
   "metadata": {},
511
   "outputs": [],
512
   "source": [
513
    "class Metric():\n",
514
    "    def __init__(self,weights,k=0.03):\n",
515
    "        self.weights=weights\n",
516
    "        self.k=k\n",
517
    "        self.zero()\n",
518
    "        \n",
519
    "    def zero(self):\n",
520
    "        self.loss_sum=0.\n",
521
    "        self.loss_count=0.\n",
522
    "        self.lossf=0.\n",
523
    "        \n",
524
    "    def calc(self,y_pred,y_true,prefix=\"\"):\n",
525
    "        window=(y_true>=0).to(torch.float)\n",
526
    "        loss = (F.binary_cross_entropy_with_logits(y_pred,y_true,reduction='none')*window*self.weights.expand_as(y_true)).mean()/(window.mean()+1e-5)\n",
527
    "        self.lossf=self.lossf*(1-self.k)+loss*self.k\n",
528
    "        self.loss_sum=self.loss_sum+loss*window.sum()\n",
529
    "        self.loss_count=self.loss_count+window.sum()\n",
530
    "        return({prefix+'mloss':self.lossf})    \n",
531
    "        \n",
532
    "    def calc_sums(self,prefix=\"\"):\n",
533
    "        return({prefix+'mloss_tot':self.loss_sum/self.loss_count})    \n",
534
    "\n"
535
   ]
536
  },
537
  {
538
   "cell_type": "code",
539
   "execution_count": 10,
540
   "metadata": {},
541
   "outputs": [],
542
   "source": [
543
    "#features=(features-features.mean())/features.std()"
544
   ]
545
  },
546
  {
547
   "cell_type": "code",
548
   "execution_count": null,
549
   "metadata": {
550
    "scrolled": true
551
   },
552
   "outputs": [],
553
   "source": [
554
    "%matplotlib nbagg\n",
555
    "for num_split in range(params['n_splits']):\n",
556
    "    multi=3\n",
557
    "    model_name,version = params['model_name'] , params['version']\n",
558
    "    print (model_name,version,num_split)\n",
559
    "    pickle_file=open(outputs_dir+outputs_format.format(model_name,version,params['train_features'],num_split),'rb')\n",
560
    "    features=pickle.load(pickle_file)\n",
561
    "    pickle_file.close()\n",
562
    "    features.shape\n",
563
    "\n",
564
    "    features=features.reshape(features.shape[0]//4,4,-1)\n",
565
    "    features.shape\n",
566
    "    split_train = train_df[train_df.PID.isin(set(split_sid[splits[num_split][0]]))].SeriesI.unique()\n",
567
    "    split_validate =  train_df[train_df.PID.isin(set(split_sid[splits[num_split][1]]))].SeriesI.unique()\n",
568
    "\n",
569
    "    np.random.seed(SEED+num_split)\n",
570
    "    torch.manual_seed(SEED+num_split)\n",
571
    "    torch.cuda.manual_seed(SEED+num_split)\n",
572
    "    torch.backends.cudnn.deterministic = True\n",
573
    "    batch_size=16\n",
574
    "    num_workers=18\n",
575
    "    num_epochs=24\n",
576
    "    klr=1\n",
577
    "    weights = torch.tensor([1.,1.,1.,1.,1.,2.],device=device)\n",
578
    "    train_dataset=FullHeadDataset(train_df,\n",
579
    "                                  split_train,\n",
580
    "                                  features,\n",
581
    "                                  'SeriesI',\n",
582
    "                                  'ImagePositionZ',\n",
583
    "                                  hemorrhage_types,\n",
584
    "                                  multi=multi)                \n",
585
    "    validate_dataset=FullHeadDataset(train_df,\n",
586
    "                                     split_validate,\n",
587
    "                                     torch.cat([features[:,i,:] for i in range(4)],-1),\n",
588
    "                                     'SeriesI',\n",
589
    "                                     'ImagePositionZ',\n",
590
    "                                     hemorrhage_types)                \n",
591
    "\n",
592
    "    model=ResModelPool(features.shape[-1])\n",
593
    "    version=version+'_fullhead_resmodel_pool2_{}'.format(multi)\n",
594
    "    _=model.to(device)\n",
595
    "    #mixup=Mixup(device=device)\n",
596
    "    loss_func=my_loss\n",
597
    "    #fig,ax = plt.subplots(figsize=(10,7))\n",
598
    "    #gr=loss_graph(fig,ax,num_epochs,len(train_dataset)//batch_size+1,limits=[0.02,0.06])\n",
599
    "    num_train_optimization_steps = num_epochs*(len(train_dataset)//batch_size+int(len(train_dataset)%batch_size>0))\n",
600
    "    sched=WarmupExpCosineWithWarmupRestartsSchedule( t_total=num_train_optimization_steps, cycles=2,tau=1)\n",
601
    "    optimizer = BertAdam(model.parameters(),lr=klr*1e-3,schedule=sched)\n",
602
    "    history,best_model= model_train(model,\n",
603
    "                                    optimizer,\n",
604
    "                                    train_dataset,\n",
605
    "                                    batch_size,\n",
606
    "                                    num_epochs,\n",
607
    "                                    loss_func,\n",
608
    "                                    weights=weights,\n",
609
    "                                    do_apex=False,\n",
610
    "                                    validate_dataset=validate_dataset,\n",
611
    "                                    param_schedualer=None,\n",
612
    "                                    weights_data=None,\n",
613
    "                                    metric=Metric(torch.tensor([1.,1.,1.,1.,1.,2.])),\n",
614
    "                                    return_model=True,\n",
615
    "                                    best_average=3,\n",
616
    "                                    num_workers=num_workers,\n",
617
    "                                    sampler=None,\n",
618
    "                                    graph=None)\n",
619
    "    torch.save(best_model.state_dict(), models_dir+models_format.format(model_name,version,num_split))"
620
   ]
621
  },
622
  {
623
   "cell_type": "markdown",
624
   "metadata": {},
625
   "source": [
626
    "## create submission file - for reference"
627
   ]
628
  },
629
  {
630
   "cell_type": "code",
631
   "execution_count": 22,
632
   "metadata": {},
633
   "outputs": [],
634
   "source": [
635
    "def align(arr,index1,index2):\n",
636
    "    return arr[np.argsort(index2)[np.argsort(np.argsort(index1))]]"
637
   ]
638
  },
639
  {
640
   "cell_type": "code",
641
   "execution_count": null,
642
   "metadata": {},
643
   "outputs": [],
644
   "source": [
645
    "pred_list = []\n",
646
    "for num_split in range(params['n_splits']):\n",
647
    "    model_name,version = params['model_name'] , params['version']\n",
648
    "    pickle_file=open(outputs_dir+outputs_format.format(model_name,version,params['test_features'],num_split),'rb')\n",
649
    "    features=pickle.load(pickle_file)\n",
650
    "    pickle_file.close()\n",
651
    "    features=features.reshape(features.shape[0]//8,8,-1)\n",
652
    "\n",
653
    "    model=ResModelPool(features.shape[-1])\n",
654
    "    version=version+'_fullhead_resmodel_pool2_3'\n",
655
    "\n",
656
    "    model.load_state_dict(torch.load(models_dir+models_format.format(model_name,version,num_split),map_location=torch.device(device)))\n",
657
    "    test_dataset=train_dataset=FullHeadDataset(test_df,\n",
658
    "                                  test_df.SeriesI.unique(),\n",
659
    "                                  features,\n",
660
    "                                  'SeriesI',\n",
661
    "                                  'ImagePositionZ',multi=4)\n",
662
    "    for i in tqdm_notebook(range(8),leave=False):\n",
663
    "        pred_list.append(torch.sigmoid(model_run(model,test_dataset,do_apex=False,batch_size=128))[...,None])\n"
664
   ]
665
  },
666
  {
667
   "cell_type": "code",
668
   "execution_count": 18,
669
   "metadata": {},
670
   "outputs": [
671
    {
672
     "data": {
673
      "text/plain": [
674
       "24"
675
      ]
676
     },
677
     "execution_count": 18,
678
     "metadata": {},
679
     "output_type": "execute_result"
680
    },
681
    {
682
     "data": {
683
      "text/plain": [
684
       "torch.Size([2214, 60, 6, 1])"
685
      ]
686
     },
687
     "execution_count": 18,
688
     "metadata": {},
689
     "output_type": "execute_result"
690
    }
691
   ],
692
   "source": [
693
    "len(pred_list)\n",
694
    "pred_list[0].shape"
695
   ]
696
  },
697
  {
698
   "cell_type": "code",
699
   "execution_count": 19,
700
   "metadata": {},
701
   "outputs": [],
702
   "source": [
703
    "pred=torch.cat(pred_list,-1).mean(-1)"
704
   ]
705
  },
706
  {
707
   "cell_type": "code",
708
   "execution_count": 20,
709
   "metadata": {},
710
   "outputs": [
711
    {
712
     "data": {
713
      "application/vnd.jupyter.widget-view+json": {
714
       "model_id": "367fa4dcd20f4104b6be2b333db348ba",
715
       "version_major": 2,
716
       "version_minor": 0
717
      },
718
      "text/plain": [
719
       "HBox(children=(IntProgress(value=0, max=2214), HTML(value='')))"
720
      ]
721
     },
722
     "metadata": {},
723
     "output_type": "display_data"
724
    },
725
    {
726
     "data": {
727
      "text/plain": [
728
       "(78545,)"
729
      ]
730
     },
731
     "execution_count": 20,
732
     "metadata": {},
733
     "output_type": "execute_result"
734
    },
735
    {
736
     "data": {
737
      "text/plain": [
738
       "(78545, 6)"
739
      ]
740
     },
741
     "execution_count": 20,
742
     "metadata": {},
743
     "output_type": "execute_result"
744
    }
745
   ],
746
   "source": [
747
    "images_id_list=[]\n",
748
    "dummeys=[]\n",
749
    "image_arr=test_df.PatientID.values\n",
750
    "ref_arr=test_df.SeriesI.values\n",
751
    "order_arr=test_df.ImagePositionZ.values\n",
752
    "for s in tqdm_notebook(test_df.SeriesI.unique()):\n",
753
    "    dumm=np.zeros(60)\n",
754
    "    head_idx = np.where(ref_arr==s)[0]\n",
755
    "    sorted_head_idx=head_idx[np.argsort(order_arr[head_idx])]\n",
756
    "    images_id_list.append(image_arr[sorted_head_idx])\n",
757
    "    dumm[0:head_idx.shape[0]]=1\n",
758
    "    dummeys.append(dumm)\n",
759
    "image_ids=np.concatenate(images_id_list)\n",
760
    "preds=pred.reshape(pred.shape[0]*pred.shape[1],6).numpy()[np.concatenate(dummeys)==1]\n",
761
    "\n",
762
    "image_ids.shape\n",
763
    "\n",
764
    "preds.shape"
765
   ]
766
  },
767
  {
768
   "cell_type": "code",
769
   "execution_count": 23,
770
   "metadata": {},
771
   "outputs": [
772
    {
773
     "data": {
774
      "text/html": [
775
       "<div>\n",
776
       "<style scoped>\n",
777
       "    .dataframe tbody tr th:only-of-type {\n",
778
       "        vertical-align: middle;\n",
779
       "    }\n",
780
       "\n",
781
       "    .dataframe tbody tr th {\n",
782
       "        vertical-align: top;\n",
783
       "    }\n",
784
       "\n",
785
       "    .dataframe thead th {\n",
786
       "        text-align: right;\n",
787
       "    }\n",
788
       "</style>\n",
789
       "<table border=\"1\" class=\"dataframe\">\n",
790
       "  <thead>\n",
791
       "    <tr style=\"text-align: right;\">\n",
792
       "      <th></th>\n",
793
       "      <th>ID</th>\n",
794
       "      <th>Label</th>\n",
795
       "    </tr>\n",
796
       "  </thead>\n",
797
       "  <tbody>\n",
798
       "    <tr>\n",
799
       "      <th>0</th>\n",
800
       "      <td>ID_000012eaf_any</td>\n",
801
       "      <td>0.000721</td>\n",
802
       "    </tr>\n",
803
       "    <tr>\n",
804
       "      <th>1</th>\n",
805
       "      <td>ID_000012eaf_epidural</td>\n",
806
       "      <td>0.000080</td>\n",
807
       "    </tr>\n",
808
       "    <tr>\n",
809
       "      <th>2</th>\n",
810
       "      <td>ID_000012eaf_intraparenchymal</td>\n",
811
       "      <td>0.000067</td>\n",
812
       "    </tr>\n",
813
       "    <tr>\n",
814
       "      <th>3</th>\n",
815
       "      <td>ID_000012eaf_intraventricular</td>\n",
816
       "      <td>0.000017</td>\n",
817
       "    </tr>\n",
818
       "    <tr>\n",
819
       "      <th>4</th>\n",
820
       "      <td>ID_000012eaf_subarachnoid</td>\n",
821
       "      <td>0.000069</td>\n",
822
       "    </tr>\n",
823
       "    <tr>\n",
824
       "      <th>5</th>\n",
825
       "      <td>ID_000012eaf_subdural</td>\n",
826
       "      <td>0.000702</td>\n",
827
       "    </tr>\n",
828
       "    <tr>\n",
829
       "      <th>6</th>\n",
830
       "      <td>ID_0000ca2f6_any</td>\n",
831
       "      <td>0.001115</td>\n",
832
       "    </tr>\n",
833
       "    <tr>\n",
834
       "      <th>7</th>\n",
835
       "      <td>ID_0000ca2f6_epidural</td>\n",
836
       "      <td>0.000038</td>\n",
837
       "    </tr>\n",
838
       "    <tr>\n",
839
       "      <th>8</th>\n",
840
       "      <td>ID_0000ca2f6_intraparenchymal</td>\n",
841
       "      <td>0.000144</td>\n",
842
       "    </tr>\n",
843
       "    <tr>\n",
844
       "      <th>9</th>\n",
845
       "      <td>ID_0000ca2f6_intraventricular</td>\n",
846
       "      <td>0.000022</td>\n",
847
       "    </tr>\n",
848
       "    <tr>\n",
849
       "      <th>10</th>\n",
850
       "      <td>ID_0000ca2f6_subarachnoid</td>\n",
851
       "      <td>0.000203</td>\n",
852
       "    </tr>\n",
853
       "    <tr>\n",
854
       "      <th>11</th>\n",
855
       "      <td>ID_0000ca2f6_subdural</td>\n",
856
       "      <td>0.000839</td>\n",
857
       "    </tr>\n",
858
       "  </tbody>\n",
859
       "</table>\n",
860
       "</div>"
861
      ],
862
      "text/plain": [
863
       "                               ID     Label\n",
864
       "0                ID_000012eaf_any  0.000721\n",
865
       "1           ID_000012eaf_epidural  0.000080\n",
866
       "2   ID_000012eaf_intraparenchymal  0.000067\n",
867
       "3   ID_000012eaf_intraventricular  0.000017\n",
868
       "4       ID_000012eaf_subarachnoid  0.000069\n",
869
       "5           ID_000012eaf_subdural  0.000702\n",
870
       "6                ID_0000ca2f6_any  0.001115\n",
871
       "7           ID_0000ca2f6_epidural  0.000038\n",
872
       "8   ID_0000ca2f6_intraparenchymal  0.000144\n",
873
       "9   ID_0000ca2f6_intraventricular  0.000022\n",
874
       "10      ID_0000ca2f6_subarachnoid  0.000203\n",
875
       "11          ID_0000ca2f6_subdural  0.000839"
876
      ]
877
     },
878
     "execution_count": 23,
879
     "metadata": {},
880
     "output_type": "execute_result"
881
    },
882
    {
883
     "data": {
884
      "text/plain": [
885
       "(471270, 2)"
886
      ]
887
     },
888
     "execution_count": 23,
889
     "metadata": {},
890
     "output_type": "execute_result"
891
    }
892
   ],
893
   "source": [
894
    "submission_df=get_submission_ids(image_ids,torch.tensor(preds),do_sigmoid=False)\n",
895
    "submission_df.head(12)\n",
896
    "submission_df.shape\n",
897
    "sub_num=999\n",
898
    "submission_df.to_csv('/media/hd/notebooks/data/RSNA/submissions/submission{}.csv'.format(sub_num),\n",
899
    "                                                                  index=False, columns=['ID','Label'])\n"
900
   ]
901
  },
902
  {
903
   "cell_type": "code",
904
   "execution_count": null,
905
   "metadata": {},
906
   "outputs": [],
907
   "source": []
908
  }
909
 ],
910
 "metadata": {
911
  "kernelspec": {
912
   "display_name": "Python 3",
913
   "language": "python",
914
   "name": "python3"
915
  },
916
  "language_info": {
917
   "codemirror_mode": {
918
    "name": "ipython",
919
    "version": 3
920
   },
921
   "file_extension": ".py",
922
   "mimetype": "text/x-python",
923
   "name": "python",
924
   "nbconvert_exporter": "python",
925
   "pygments_lexer": "ipython3",
926
   "version": "3.6.6"
927
  }
928
 },
929
 "nbformat": 4,
930
 "nbformat_minor": 2
931
}