a b/Serialized/train_base_models.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 1,
6
   "metadata": {},
7
   "outputs": [
8
    {
9
     "name": "stderr",
10
     "output_type": "stream",
11
     "text": [
12
      "/home/reina/anaconda3/envs/RSNA/lib/python3.6/importlib/_bootstrap.py:219: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__\n",
13
      "  return f(*args, **kwds)\n",
14
      "/home/reina/anaconda3/envs/RSNA/lib/python3.6/importlib/_bootstrap.py:219: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__\n",
15
      "  return f(*args, **kwds)\n"
16
     ]
17
    }
18
   ],
19
   "source": [
20
    "from __future__ import absolute_import\n",
21
    "from __future__ import division\n",
22
    "from __future__ import print_function\n",
23
    "\n",
24
    "\n",
25
    "import numpy as np # linear algebra\n",
26
    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
27
    "import os\n",
28
    "import datetime\n",
29
    "import seaborn as sns\n",
30
    "import pydicom\n",
31
    "import time\n",
32
    "import gc\n",
33
    "import operator \n",
34
    "from apex import amp \n",
35
    "import matplotlib.pyplot as plt\n",
36
    "import torch\n",
37
    "import torch.nn as nn\n",
38
    "import torch.utils.data as D\n",
39
    "import torch.nn.functional as F\n",
40
    "from sklearn.model_selection import KFold\n",
41
    "from tqdm import tqdm, tqdm_notebook\n",
42
    "from IPython.core.interactiveshell import InteractiveShell\n",
43
    "InteractiveShell.ast_node_interactivity = \"all\"\n",
44
    "import warnings\n",
45
    "warnings.filterwarnings(action='once')\n",
46
    "import pickle\n",
47
    "%load_ext autoreload\n",
48
    "%autoreload 2\n",
49
    "%matplotlib inline\n",
50
    "from skimage.io import imread,imshow\n",
51
    "from helper import *\n",
52
    "from apex import amp\n",
53
    "import helper\n",
54
    "import torchvision.models as models\n",
55
    "import pretrainedmodels\n",
56
    "from torch.optim import Adam\n",
57
    "from functools import partial\n",
58
    "from defenitions import *"
59
   ]
60
  },
61
  {
62
   "cell_type": "markdown",
63
   "metadata": {},
64
   "source": [
65
    "## Set parameters below"
66
   ]
67
  },
68
  {
69
   "cell_type": "code",
70
   "execution_count": 11,
71
   "metadata": {},
72
   "outputs": [],
73
   "source": [
74
    "# here you should set which model parameters you want to choose (see definitions.py) and what GPU to use\n",
75
    "params=parameters['se_resnet101_5'] # se_resnet101_5, se_resnext101_32x4d_3, se_resnext101_32x4d_5\n",
76
    "\n",
77
    "device=device_by_name(\"Tesla\") # RTX , cpu\n",
78
    "torch.cuda.set_device(device)\n",
79
    "sendmeemail=Email_Progress(my_gmail,my_pass,to_email,'{} results'.format(params['model_name']))"
80
   ]
81
  },
82
  {
83
   "cell_type": "code",
84
   "execution_count": 12,
85
   "metadata": {},
86
   "outputs": [
87
    {
88
     "data": {
89
      "text/plain": [
90
       "{'model_name': 'se_resnet101',\n",
91
       " 'SEED': 432,\n",
92
       " 'n_splits': 5,\n",
93
       " 'Pre_version': None,\n",
94
       " 'focal': False,\n",
95
       " 'version': 'new_splits',\n",
96
       " 'train_prediction': 'predictions_train_tta',\n",
97
       " 'train_features': 'features_train_tta',\n",
98
       " 'test_prediction': 'predictions_test',\n",
99
       " 'test_features': 'features_test',\n",
100
       " 'num_epochs': 5,\n",
101
       " 'num_pool': 8}"
102
      ]
103
     },
104
     "execution_count": 12,
105
     "metadata": {},
106
     "output_type": "execute_result"
107
    }
108
   ],
109
   "source": [
110
    "params"
111
   ]
112
  },
113
  {
114
   "cell_type": "code",
115
   "execution_count": 13,
116
   "metadata": {},
117
   "outputs": [],
118
   "source": [
119
    "SEED = params['SEED']\n",
120
    "n_splits=params['n_splits']"
121
   ]
122
  },
123
  {
124
   "cell_type": "code",
125
   "execution_count": 14,
126
   "metadata": {},
127
   "outputs": [
128
    {
129
     "data": {
130
      "text/plain": [
131
       "(674252, 15)"
132
      ]
133
     },
134
     "execution_count": 14,
135
     "metadata": {},
136
     "output_type": "execute_result"
137
    },
138
    {
139
     "data": {
140
      "text/plain": [
141
       "(674252, 15)"
142
      ]
143
     },
144
     "execution_count": 14,
145
     "metadata": {},
146
     "output_type": "execute_result"
147
    },
148
    {
149
     "data": {
150
      "text/html": [
151
       "<div>\n",
152
       "<style scoped>\n",
153
       "    .dataframe tbody tr th:only-of-type {\n",
154
       "        vertical-align: middle;\n",
155
       "    }\n",
156
       "\n",
157
       "    .dataframe tbody tr th {\n",
158
       "        vertical-align: top;\n",
159
       "    }\n",
160
       "\n",
161
       "    .dataframe thead th {\n",
162
       "        text-align: right;\n",
163
       "    }\n",
164
       "</style>\n",
165
       "<table border=\"1\" class=\"dataframe\">\n",
166
       "  <thead>\n",
167
       "    <tr style=\"text-align: right;\">\n",
168
       "      <th></th>\n",
169
       "      <th>PatientID</th>\n",
170
       "      <th>epidural</th>\n",
171
       "      <th>intraparenchymal</th>\n",
172
       "      <th>intraventricular</th>\n",
173
       "      <th>subarachnoid</th>\n",
174
       "      <th>subdural</th>\n",
175
       "      <th>any</th>\n",
176
       "      <th>PID</th>\n",
177
       "      <th>StudyI</th>\n",
178
       "      <th>SeriesI</th>\n",
179
       "      <th>WindowCenter</th>\n",
180
       "      <th>WindowWidth</th>\n",
181
       "      <th>ImagePositionZ</th>\n",
182
       "      <th>ImagePositionX</th>\n",
183
       "      <th>ImagePositionY</th>\n",
184
       "    </tr>\n",
185
       "  </thead>\n",
186
       "  <tbody>\n",
187
       "    <tr>\n",
188
       "      <th>0</th>\n",
189
       "      <td>63eb1e259</td>\n",
190
       "      <td>0</td>\n",
191
       "      <td>0</td>\n",
192
       "      <td>0</td>\n",
193
       "      <td>0</td>\n",
194
       "      <td>0</td>\n",
195
       "      <td>0</td>\n",
196
       "      <td>a449357f</td>\n",
197
       "      <td>62d125e5b2</td>\n",
198
       "      <td>0be5c0d1b3</td>\n",
199
       "      <td>['00036', '00036']</td>\n",
200
       "      <td>['00080', '00080']</td>\n",
201
       "      <td>180.199951</td>\n",
202
       "      <td>-125.0</td>\n",
203
       "      <td>-8.000000</td>\n",
204
       "    </tr>\n",
205
       "    <tr>\n",
206
       "      <th>1</th>\n",
207
       "      <td>2669954a7</td>\n",
208
       "      <td>0</td>\n",
209
       "      <td>0</td>\n",
210
       "      <td>0</td>\n",
211
       "      <td>0</td>\n",
212
       "      <td>0</td>\n",
213
       "      <td>0</td>\n",
214
       "      <td>363d5865</td>\n",
215
       "      <td>a20b80c7bf</td>\n",
216
       "      <td>3564d584db</td>\n",
217
       "      <td>['00047', '00047']</td>\n",
218
       "      <td>['00080', '00080']</td>\n",
219
       "      <td>922.530821</td>\n",
220
       "      <td>-156.0</td>\n",
221
       "      <td>45.572849</td>\n",
222
       "    </tr>\n",
223
       "    <tr>\n",
224
       "      <th>2</th>\n",
225
       "      <td>52c9913b1</td>\n",
226
       "      <td>0</td>\n",
227
       "      <td>0</td>\n",
228
       "      <td>0</td>\n",
229
       "      <td>0</td>\n",
230
       "      <td>0</td>\n",
231
       "      <td>0</td>\n",
232
       "      <td>9c2b4bd7</td>\n",
233
       "      <td>3e3634f8cf</td>\n",
234
       "      <td>973274ffc9</td>\n",
235
       "      <td>40</td>\n",
236
       "      <td>150</td>\n",
237
       "      <td>4.455000</td>\n",
238
       "      <td>-125.0</td>\n",
239
       "      <td>-115.063000</td>\n",
240
       "    </tr>\n",
241
       "    <tr>\n",
242
       "      <th>3</th>\n",
243
       "      <td>4e6ff6126</td>\n",
244
       "      <td>0</td>\n",
245
       "      <td>0</td>\n",
246
       "      <td>0</td>\n",
247
       "      <td>0</td>\n",
248
       "      <td>0</td>\n",
249
       "      <td>0</td>\n",
250
       "      <td>3ae81c2d</td>\n",
251
       "      <td>a1390c15c2</td>\n",
252
       "      <td>e5ccad8244</td>\n",
253
       "      <td>['00036', '00036']</td>\n",
254
       "      <td>['00080', '00080']</td>\n",
255
       "      <td>100.000000</td>\n",
256
       "      <td>-99.5</td>\n",
257
       "      <td>28.500000</td>\n",
258
       "    </tr>\n",
259
       "    <tr>\n",
260
       "      <th>4</th>\n",
261
       "      <td>7858edd88</td>\n",
262
       "      <td>0</td>\n",
263
       "      <td>0</td>\n",
264
       "      <td>0</td>\n",
265
       "      <td>0</td>\n",
266
       "      <td>0</td>\n",
267
       "      <td>0</td>\n",
268
       "      <td>c1867feb</td>\n",
269
       "      <td>c73e81ed3a</td>\n",
270
       "      <td>28e0531b3a</td>\n",
271
       "      <td>40</td>\n",
272
       "      <td>100</td>\n",
273
       "      <td>145.793000</td>\n",
274
       "      <td>-125.0</td>\n",
275
       "      <td>-132.190000</td>\n",
276
       "    </tr>\n",
277
       "  </tbody>\n",
278
       "</table>\n",
279
       "</div>"
280
      ],
281
      "text/plain": [
282
       "   PatientID  epidural  intraparenchymal  intraventricular  subarachnoid  \\\n",
283
       "0  63eb1e259         0                 0                 0             0   \n",
284
       "1  2669954a7         0                 0                 0             0   \n",
285
       "2  52c9913b1         0                 0                 0             0   \n",
286
       "3  4e6ff6126         0                 0                 0             0   \n",
287
       "4  7858edd88         0                 0                 0             0   \n",
288
       "\n",
289
       "   subdural  any       PID      StudyI     SeriesI        WindowCenter  \\\n",
290
       "0         0    0  a449357f  62d125e5b2  0be5c0d1b3  ['00036', '00036']   \n",
291
       "1         0    0  363d5865  a20b80c7bf  3564d584db  ['00047', '00047']   \n",
292
       "2         0    0  9c2b4bd7  3e3634f8cf  973274ffc9                  40   \n",
293
       "3         0    0  3ae81c2d  a1390c15c2  e5ccad8244  ['00036', '00036']   \n",
294
       "4         0    0  c1867feb  c73e81ed3a  28e0531b3a                  40   \n",
295
       "\n",
296
       "          WindowWidth  ImagePositionZ  ImagePositionX  ImagePositionY  \n",
297
       "0  ['00080', '00080']      180.199951          -125.0       -8.000000  \n",
298
       "1  ['00080', '00080']      922.530821          -156.0       45.572849  \n",
299
       "2                 150        4.455000          -125.0     -115.063000  \n",
300
       "3  ['00080', '00080']      100.000000           -99.5       28.500000  \n",
301
       "4                 100      145.793000          -125.0     -132.190000  "
302
      ]
303
     },
304
     "execution_count": 14,
305
     "metadata": {},
306
     "output_type": "execute_result"
307
    }
308
   ],
309
   "source": [
310
    "train_df = pd.read_csv(data_dir+'train.csv')\n",
311
    "train_df.shape\n",
312
    "train_df=train_df[~train_df.PatientID.isin(bad_images)].reset_index(drop=True)\n",
313
    "train_df=train_df.drop_duplicates().reset_index(drop=True)\n",
314
    "train_df.shape\n",
315
    "train_df.head()"
316
   ]
317
  },
318
  {
319
   "cell_type": "code",
320
   "execution_count": 15,
321
   "metadata": {},
322
   "outputs": [
323
    {
324
     "data": {
325
      "text/html": [
326
       "<div>\n",
327
       "<style scoped>\n",
328
       "    .dataframe tbody tr th:only-of-type {\n",
329
       "        vertical-align: middle;\n",
330
       "    }\n",
331
       "\n",
332
       "    .dataframe tbody tr th {\n",
333
       "        vertical-align: top;\n",
334
       "    }\n",
335
       "\n",
336
       "    .dataframe thead th {\n",
337
       "        text-align: right;\n",
338
       "    }\n",
339
       "</style>\n",
340
       "<table border=\"1\" class=\"dataframe\">\n",
341
       "  <thead>\n",
342
       "    <tr style=\"text-align: right;\">\n",
343
       "      <th></th>\n",
344
       "      <th>PatientID</th>\n",
345
       "      <th>epidural</th>\n",
346
       "      <th>intraparenchymal</th>\n",
347
       "      <th>intraventricular</th>\n",
348
       "      <th>subarachnoid</th>\n",
349
       "      <th>subdural</th>\n",
350
       "      <th>any</th>\n",
351
       "      <th>SeriesI</th>\n",
352
       "      <th>PID</th>\n",
353
       "      <th>StudyI</th>\n",
354
       "      <th>WindowCenter</th>\n",
355
       "      <th>WindowWidth</th>\n",
356
       "      <th>ImagePositionZ</th>\n",
357
       "      <th>ImagePositionX</th>\n",
358
       "      <th>ImagePositionY</th>\n",
359
       "    </tr>\n",
360
       "  </thead>\n",
361
       "  <tbody>\n",
362
       "    <tr>\n",
363
       "      <th>0</th>\n",
364
       "      <td>28fbab7eb</td>\n",
365
       "      <td>0.5</td>\n",
366
       "      <td>0.5</td>\n",
367
       "      <td>0.5</td>\n",
368
       "      <td>0.5</td>\n",
369
       "      <td>0.5</td>\n",
370
       "      <td>0.5</td>\n",
371
       "      <td>ebfd7e4506</td>\n",
372
       "      <td>cf1b6b11</td>\n",
373
       "      <td>93407cadbb</td>\n",
374
       "      <td>30</td>\n",
375
       "      <td>80</td>\n",
376
       "      <td>158.458000</td>\n",
377
       "      <td>-125.0</td>\n",
378
       "      <td>-135.598000</td>\n",
379
       "    </tr>\n",
380
       "    <tr>\n",
381
       "      <th>1</th>\n",
382
       "      <td>877923b8b</td>\n",
383
       "      <td>0.5</td>\n",
384
       "      <td>0.5</td>\n",
385
       "      <td>0.5</td>\n",
386
       "      <td>0.5</td>\n",
387
       "      <td>0.5</td>\n",
388
       "      <td>0.5</td>\n",
389
       "      <td>6d95084e15</td>\n",
390
       "      <td>ad8ea58f</td>\n",
391
       "      <td>a337baa067</td>\n",
392
       "      <td>30</td>\n",
393
       "      <td>80</td>\n",
394
       "      <td>138.729050</td>\n",
395
       "      <td>-125.0</td>\n",
396
       "      <td>-101.797981</td>\n",
397
       "    </tr>\n",
398
       "    <tr>\n",
399
       "      <th>2</th>\n",
400
       "      <td>a591477cb</td>\n",
401
       "      <td>0.5</td>\n",
402
       "      <td>0.5</td>\n",
403
       "      <td>0.5</td>\n",
404
       "      <td>0.5</td>\n",
405
       "      <td>0.5</td>\n",
406
       "      <td>0.5</td>\n",
407
       "      <td>8e06b2c9e0</td>\n",
408
       "      <td>ecfb278b</td>\n",
409
       "      <td>0cfe838d54</td>\n",
410
       "      <td>30</td>\n",
411
       "      <td>80</td>\n",
412
       "      <td>60.830002</td>\n",
413
       "      <td>-125.0</td>\n",
414
       "      <td>-133.300003</td>\n",
415
       "    </tr>\n",
416
       "    <tr>\n",
417
       "      <th>3</th>\n",
418
       "      <td>42217c898</td>\n",
419
       "      <td>0.5</td>\n",
420
       "      <td>0.5</td>\n",
421
       "      <td>0.5</td>\n",
422
       "      <td>0.5</td>\n",
423
       "      <td>0.5</td>\n",
424
       "      <td>0.5</td>\n",
425
       "      <td>e800f419cf</td>\n",
426
       "      <td>e96e31f4</td>\n",
427
       "      <td>c497ac5bad</td>\n",
428
       "      <td>30</td>\n",
429
       "      <td>80</td>\n",
430
       "      <td>55.388000</td>\n",
431
       "      <td>-125.0</td>\n",
432
       "      <td>-146.081000</td>\n",
433
       "    </tr>\n",
434
       "    <tr>\n",
435
       "      <th>4</th>\n",
436
       "      <td>a130c4d2f</td>\n",
437
       "      <td>0.5</td>\n",
438
       "      <td>0.5</td>\n",
439
       "      <td>0.5</td>\n",
440
       "      <td>0.5</td>\n",
441
       "      <td>0.5</td>\n",
442
       "      <td>0.5</td>\n",
443
       "      <td>faeb7454f3</td>\n",
444
       "      <td>69affa42</td>\n",
445
       "      <td>854e4fbc01</td>\n",
446
       "      <td>30</td>\n",
447
       "      <td>80</td>\n",
448
       "      <td>33.516888</td>\n",
449
       "      <td>-125.0</td>\n",
450
       "      <td>-118.689819</td>\n",
451
       "    </tr>\n",
452
       "  </tbody>\n",
453
       "</table>\n",
454
       "</div>"
455
      ],
456
      "text/plain": [
457
       "   PatientID  epidural  intraparenchymal  intraventricular  subarachnoid  \\\n",
458
       "0  28fbab7eb       0.5               0.5               0.5           0.5   \n",
459
       "1  877923b8b       0.5               0.5               0.5           0.5   \n",
460
       "2  a591477cb       0.5               0.5               0.5           0.5   \n",
461
       "3  42217c898       0.5               0.5               0.5           0.5   \n",
462
       "4  a130c4d2f       0.5               0.5               0.5           0.5   \n",
463
       "\n",
464
       "   subdural  any     SeriesI       PID      StudyI WindowCenter WindowWidth  \\\n",
465
       "0       0.5  0.5  ebfd7e4506  cf1b6b11  93407cadbb           30          80   \n",
466
       "1       0.5  0.5  6d95084e15  ad8ea58f  a337baa067           30          80   \n",
467
       "2       0.5  0.5  8e06b2c9e0  ecfb278b  0cfe838d54           30          80   \n",
468
       "3       0.5  0.5  e800f419cf  e96e31f4  c497ac5bad           30          80   \n",
469
       "4       0.5  0.5  faeb7454f3  69affa42  854e4fbc01           30          80   \n",
470
       "\n",
471
       "   ImagePositionZ  ImagePositionX  ImagePositionY  \n",
472
       "0      158.458000          -125.0     -135.598000  \n",
473
       "1      138.729050          -125.0     -101.797981  \n",
474
       "2       60.830002          -125.0     -133.300003  \n",
475
       "3       55.388000          -125.0     -146.081000  \n",
476
       "4       33.516888          -125.0     -118.689819  "
477
      ]
478
     },
479
     "execution_count": 15,
480
     "metadata": {},
481
     "output_type": "execute_result"
482
    }
483
   ],
484
   "source": [
485
    "test_df = pd.read_csv(data_dir+'test.csv')\n",
486
    "test_df.head()"
487
   ]
488
  },
489
  {
490
   "cell_type": "code",
491
   "execution_count": 16,
492
   "metadata": {},
493
   "outputs": [],
494
   "source": [
495
    "split_sid = train_df.PID.unique()\n",
496
    "splits=list(KFold(n_splits=n_splits,shuffle=True, random_state=SEED).split(split_sid))\n"
497
   ]
498
  },
499
  {
500
   "cell_type": "code",
501
   "execution_count": 17,
502
   "metadata": {},
503
   "outputs": [],
504
   "source": [
505
    "pickle_file=open(outputs_dir+\"PID_splits_{}.pkl\".format(n_splits),'wb')\n",
506
    "pickle.dump((split_sid,splits),pickle_file,protocol=4)\n",
507
    "pickle_file.close()\n"
508
   ]
509
  },
510
  {
511
   "cell_type": "code",
512
   "execution_count": 10,
513
   "metadata": {},
514
   "outputs": [],
515
   "source": [
516
    "def my_loss(y_pred,y_true,weights):\n",
517
    "    if len(y_pred.shape)==len(y_true.shape): \n",
518
    "        # Normal loss\n",
519
    "        loss = F.binary_cross_entropy_with_logits(y_pred,y_true,weights.expand_as(y_pred))\n",
520
    "    else:\n",
521
    "        # Mixup loss (not used here)\n",
522
    "        loss0 = F.binary_cross_entropy_with_logits(y_pred,y_true[...,0],weights.repeat(y_pred.shape[0],1),reduction='none')\n",
523
    "        loss1 = F.binary_cross_entropy_with_logits(y_pred,y_true[...,1],weights.repeat(y_pred.shape[0],1),reduction='none')\n",
524
    "        loss = (y_true[...,2]*loss0+(1.0-y_true[...,2])*loss1).mean() \n",
525
    "    return loss"
526
   ]
527
  },
528
  {
529
   "cell_type": "code",
530
   "execution_count": 11,
531
   "metadata": {},
532
   "outputs": [],
533
   "source": [
534
    "class FocalLoss(nn.Module):\n",
535
    "    def __init__(self, alpha=1, gamma=2, logits=True, reduce=True):\n",
536
    "        super(FocalLoss, self).__init__()\n",
537
    "        self.alpha = alpha\n",
538
    "        self.gamma = gamma\n",
539
    "        self.logits = logits\n",
540
    "        self.reduce = reduce\n",
541
    "\n",
542
    "    def forward(self, y_pred,y_true,weights):\n",
543
    "        if self.logits:\n",
544
    "            BCE_loss = F.binary_cross_entropy_with_logits(y_pred,y_true,weights.expand_as(y_pred), reduction='none')\n",
545
    "        else:\n",
546
    "            BCE_loss = F.binary_cross_entropy(y_pred,y_true,weights.expand_as(y_pred), reduction='none')\n",
547
    "        pt = torch.exp(-BCE_loss)\n",
548
    "        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss\n",
549
    "\n",
550
    "        if self.reduce:\n",
551
    "            return torch.mean(F_loss)\n",
552
    "        else:\n",
553
    "            return F_loss"
554
   ]
555
  },
556
  {
557
   "cell_type": "code",
558
   "execution_count": 12,
559
   "metadata": {},
560
   "outputs": [],
561
   "source": [
562
    "class parameter_scheduler():\n",
563
    "    def __init__(self,model,do_first=['classifier'],num_epoch=1):\n",
564
    "        self.model=model\n",
565
    "        self.do_first = do_first\n",
566
    "        self.num_epoch=num_epoch\n",
567
    "    def __call__(self,epoch):\n",
568
    "        if epoch>=self.num_epoch:\n",
569
    "            for n,p in self.model.named_parameters():\n",
570
    "                p.requires_grad=True\n",
571
    "        else:\n",
572
    "            for n,p in self.model.named_parameters():\n",
573
    "                p.requires_grad= any(nd in n for nd in self.do_first)\n"
574
   ]
575
  },
576
  {
577
   "cell_type": "code",
578
   "execution_count": 13,
579
   "metadata": {},
580
   "outputs": [],
581
   "source": [
582
    "def get_model(model_name):\n",
583
    "    if params['model_name'].startswith('se'):\n",
584
    "        return MySENet, pretrainedmodels.__dict__[params['model_name']](num_classes=1000, pretrained='imagenet')\n",
585
    "    elif 'Densenet161' in params['model_name']:\n",
586
    "        return partial(MyDenseNet, strategy='none'),models.densenet161(pretrained=True)\n",
587
    "    elif 'Densenet169' in params['model_name']:\n",
588
    "        return partial(MyDenseNet, strategy='none'),models.densenet169(pretrained=True)\n",
589
    "    else:\n",
590
    "        raise"
591
   ]
592
  },
593
  {
594
   "cell_type": "code",
595
   "execution_count": null,
596
   "metadata": {
597
    "scrolled": false
598
   },
599
   "outputs": [],
600
   "source": [
601
    "%matplotlib nbagg\n",
602
    "for num_split in range(params['n_splits']):\n",
603
    "    np.random.seed(SEED+num_split)\n",
604
    "    torch.manual_seed(SEED+num_split)\n",
605
    "    torch.cuda.manual_seed(SEED+num_split)\n",
606
    "    #torch.backends.cudnn.deterministic = True\n",
607
    "    idx_train = train_df[train_df.PID.isin(set(split_sid[splits[num_split][0]]))].index.values\n",
608
    "    idx_validate =  train_df[train_df.PID.isin(set(split_sid[splits[num_split][1]]))].index.values\n",
609
    "    idx_train.shape\n",
610
    "    idx_validate.shape\n",
611
    "\n",
612
    "    klr=1\n",
613
    "    batch_size=32\n",
614
    "    num_workers=12\n",
615
    "    num_epochs=params['num_epochs']\n",
616
    "    model_name,version = params['model_name'] , params['version']\n",
617
    "    new_model,base_model=get_model(params['model_name'])\n",
618
    "    model =  new_model(base_model,\n",
619
    "                       len(hemorrhage_types),\n",
620
    "                       num_channels=3,\n",
621
    "                       dropout=0.2,\n",
622
    "                       wso=((40,80),(80,200),(40,400)),\n",
623
    "                       dont_do_grad=[],\n",
624
    "                       extra_pool=params['num_pool'],\n",
625
    "                       )\n",
626
    "    if params['Pre_version'] is not None:\n",
627
    "        model.load_state_dict(torch.load(models_dir+models_format.format(model_name,params['Pre_version'],\n",
628
    "                                                                         num_split),map_location=torch.device(device)))\n",
629
    "\n",
630
    "    _=model.to(device)\n",
631
    "    weights = torch.tensor([1.,1.,1.,1.,1.,2.],device=device)\n",
632
    "    loss_func=my_loss if not params['focal'] else FocalLoss()\n",
633
    "    targets_dataset=D.TensorDataset(torch.tensor(train_df[hemorrhage_types].values,dtype=torch.float))\n",
634
    "    transform=MyTransform(mean_change=15,\n",
635
    "                          std_change=0,\n",
636
    "                          flip=True,\n",
637
    "                          zoom=(0.2,0.2),\n",
638
    "                          rotate=30,\n",
639
    "                          out_size=512,\n",
640
    "                          shift=10,\n",
641
    "                          normal=False)\n",
642
    "    imagedataset = ImageDataset(train_df,transform=transform.random,base_path=train_images_dir,\n",
643
    "                               window_eq=False,equalize=False,rescale=True)\n",
644
    "    transform_val=MyTransform(out_size=512)\n",
645
    "    imagedataset_val = ImageDataset(train_df,transform=transform_val.random,base_path=train_images_dir,\n",
646
    "                                   window_eq=False,equalize=False,rescale=True)\n",
647
    "    combined_dataset=DatasetCat([imagedataset,targets_dataset])\n",
648
    "    combined_dataset_val=DatasetCat([imagedataset_val,targets_dataset])\n",
649
    "    optimizer_grouped_parameters=model.get_optimizer_parameters(klr)\n",
650
    "    sampling=sampler(train_df[hemorrhage_types].values[idx_train],0.5,[0,0,0,0,0,1])\n",
651
    "    sample_ratio=1.02*float(sampling().shape[0])/idx_train.shape[0]\n",
652
    "    train_dataset=D.Subset(combined_dataset,idx_train)\n",
653
    "    validate_dataset=D.Subset(combined_dataset_val,idx_validate)\n",
654
    "    num_train_optimization_steps = num_epochs*(sample_ratio*len(train_dataset)//batch_size+int(len(train_dataset)%batch_size>0))\n",
655
    "    fig,ax = plt.subplots(figsize=(10,7))\n",
656
    "    gr=loss_graph(fig,ax,num_epochs,int(num_train_optimization_steps/num_epochs)+1,limits=(0.05,0.2))\n",
657
    "    sched=WarmupExpCosineWithWarmupRestartsSchedule( t_total=num_train_optimization_steps, cycles=num_epochs,tau=1)\n",
658
    "    optimizer = BertAdam(optimizer_grouped_parameters,lr=klr*1e-3,schedule=sched)\n",
659
    "    model, optimizer = amp.initialize(model, optimizer, opt_level=\"O1\",verbosity=0)\n",
660
    "    history,best_model= model_train(model,\n",
661
    "                                    optimizer,\n",
662
    "                                    train_dataset,\n",
663
    "                                    batch_size,\n",
664
    "                                    num_epochs,\n",
665
    "                                    loss_func,\n",
666
    "                                    weights=weights,\n",
667
    "                                    do_apex=False,\n",
668
    "                                    model_apexed=True,\n",
669
    "                                    validate_dataset=validate_dataset,\n",
670
    "                                    param_schedualer=None,\n",
671
    "                                    weights_data=None,\n",
672
    "                                    metric=None,\n",
673
    "                                    return_model=True,\n",
674
    "                                    num_workers=num_workers,\n",
675
    "                                    sampler=None,\n",
676
    "                                    pre_process = None,\n",
677
    "                                    graph=gr,\n",
678
    "                                    call_progress=sendmeemail)\n",
679
    "\n",
680
    "    torch.save(best_model.state_dict(), models_dir+models_format.format(model_name,version,num_split))"
681
   ]
682
  },
683
  {
684
   "cell_type": "code",
685
   "execution_count": null,
686
   "metadata": {},
687
   "outputs": [],
688
   "source": [
689
    "for num_split in range(params['n_splits']):\n",
690
    "    idx_validate =  train_df[train_df.PID.isin(set(split_sid[splits[num_split][1]]))].index.values\n",
691
    "    model_name,version =params['model_name'] , params['version']\n",
692
    "    new_model,base_model=get_model(params['model_name'])\n",
693
    "    model =  new_model(base_model,\n",
694
    "                       len(hemorrhage_types),\n",
695
    "                       num_channels=3,\n",
696
    "                       dropout=0.2,\n",
697
    "                       wso=((40,80),(80,200),(40,400)),\n",
698
    "                       dont_do_grad=[],\n",
699
    "                       extra_pool=params['num_pool'],\n",
700
    "                       )\n",
701
    "    model.load_state_dict(torch.load(models_dir+models_format.format(model_name,version,num_split),map_location=torch.device(device)))\n",
702
    "    _=model.to(device)\n",
703
    "    transform=MyTransform(mean_change=15,\n",
704
    "                          std_change=0,\n",
705
    "                          flip=True,\n",
706
    "                          zoom=(0.2,0.2),\n",
707
    "                          rotate=30,\n",
708
    "                          out_size=512,\n",
709
    "                          shift=0,\n",
710
    "                          normal=False)\n",
711
    "    indexes=np.arange(train_df.shape[0]).repeat(4)\n",
712
    "    train_dataset=D.Subset(ImageDataset(train_df,transform=transform.random,base_path=train_images_dir,\n",
713
    "                              window_eq=False,equalize=False,rescale=True),indexes)\n",
714
    "    pred,features = model_run(model,train_dataset,do_apex=True,batch_size=96,num_workers=14)\n",
715
    "\n",
716
    "    pickle_file=open(outputs_dir+outputs_format.format(model_name,version,params['train_features'],num_split),'wb')\n",
717
    "    pickle.dump(features,pickle_file,protocol=4)\n",
718
    "    pickle_file.close()\n",
719
    "\n",
720
    "    pickle_file=open(outputs_dir+outputs_format.format(model_name,version,params['train_prediction'],num_split),'wb')\n",
721
    "    pickle.dump(pred,pickle_file,protocol=4)\n",
722
    "    pickle_file.close()\n",
723
    "\n",
724
    "\n",
725
    "    my_loss(pred[(idx_validate*4+np.arange(4)[:,None]).transpose(1,0)].mean(1),\n",
726
    "            torch.tensor(train_df[hemorrhage_types].values[idx_validate],dtype=torch.float),\n",
727
    "            torch.tensor([1.,1.,1.,1.,1.,2.]))"
728
   ]
729
  },
730
  {
731
   "cell_type": "code",
732
   "execution_count": null,
733
   "metadata": {},
734
   "outputs": [],
735
   "source": [
736
    "for num_split in range(params['n_splits']):\n",
737
    "    idx_validate =  train_df[train_df.PID.isin(set(split_sid[splits[num_split][1]]))].index.values\n",
738
    "    model_name,version =params['model_name'] , params['version']\n",
739
    "    new_model,base_model=get_model(params['model_name'])\n",
740
    "    model =  new_model(base_model,\n",
741
    "                       len(hemorrhage_types),\n",
742
    "                       num_channels=3,\n",
743
    "                       dropout=0.2,\n",
744
    "                       wso=((40,80),(80,200),(40,400)),\n",
745
    "                       dont_do_grad=[],\n",
746
    "                       extra_pool=params['num_pool'],\n",
747
    "                       )\n",
748
    "    model.load_state_dict(torch.load(models_dir+models_format.format(model_name,version,num_split),map_location=torch.device(device)))\n",
749
    "    _=model.to(device)\n",
750
    "    transform=MyTransform(mean_change=15,\n",
751
    "                          std_change=0,\n",
752
    "                          flip=True,\n",
753
    "                          zoom=(0.2,0.2),\n",
754
    "                          rotate=30,\n",
755
    "                          out_size=512,\n",
756
    "                          shift=0,\n",
757
    "                          normal=False)\n",
758
    "    indexes=np.arange(test_df.shape[0]).repeat(8)\n",
759
    "    imagedataset_test=D.Subset(ImageDataset(test_df,transform=transform.random,base_path=test_images_dir,\n",
760
    "                                  window_eq=False,equalize=False,rescale=True),indexes)\n",
761
    "    pred,features = model_run(model,imagedataset_test,do_apex=True,batch_size=96,num_workers=18)\n",
762
    "    pickle_file=open(outputs_dir+outputs_format.format(model_name,version,params['test_features'],num_split),'wb')\n",
763
    "    pickle.dump(features,pickle_file,protocol=4)\n",
764
    "    pickle_file.close()\n",
765
    "\n",
766
    "    pickle_file=open(outputs_dir+outputs_format.format(model_name,version,params['test_prediction'],num_split),'wb')\n",
767
    "    pickle.dump(pred,pickle_file,protocol=4)\n",
768
    "    pickle_file.close()\n"
769
   ]
770
  },
771
  {
772
   "cell_type": "markdown",
773
   "metadata": {},
774
   "source": [
775
    "## create submission file - for reference"
776
   ]
777
  },
778
  {
779
   "cell_type": "code",
780
   "execution_count": 33,
781
   "metadata": {},
782
   "outputs": [
783
    {
784
     "data": {
785
      "application/vnd.jupyter.widget-view+json": {
786
       "model_id": "3ae2e5d2ec4d4919b47ab1bf3482807c",
787
       "version_major": 2,
788
       "version_minor": 0
789
      },
790
      "text/plain": [
791
       "HBox(children=(IntProgress(value=0, max=3), HTML(value='')))"
792
      ]
793
     },
794
     "metadata": {},
795
     "output_type": "display_data"
796
    },
797
    {
798
     "data": {
799
      "text/plain": [
800
       "torch.Size([78545, 24, 6])"
801
      ]
802
     },
803
     "execution_count": 33,
804
     "metadata": {},
805
     "output_type": "execute_result"
806
    }
807
   ],
808
   "source": [
809
    "preds=[]\n",
810
    "for i in tqdm_notebook(range(params['n_splits'])):\n",
811
    "    model_name,version, num_split =  params['model_name'] , params['version'],i\n",
812
    "    pickle_file=open(outputs_dir+outputs_format.format(model_name,version,params['test_prediction'],num_split),'rb')\n",
813
    "    pred=pickle.load(pickle_file)\n",
814
    "    pickle_file.close()\n",
815
    "    preds.append(pred[(np.arange(pred.shape[0]).reshape(pred.shape[0]//8,8))])\n",
816
    "predss = torch.cat(preds,1)\n",
817
    "predss.shape"
818
   ]
819
  },
820
  {
821
   "cell_type": "code",
822
   "execution_count": 36,
823
   "metadata": {},
824
   "outputs": [
825
    {
826
     "data": {
827
      "text/html": [
828
       "<div>\n",
829
       "<style scoped>\n",
830
       "    .dataframe tbody tr th:only-of-type {\n",
831
       "        vertical-align: middle;\n",
832
       "    }\n",
833
       "\n",
834
       "    .dataframe tbody tr th {\n",
835
       "        vertical-align: top;\n",
836
       "    }\n",
837
       "\n",
838
       "    .dataframe thead th {\n",
839
       "        text-align: right;\n",
840
       "    }\n",
841
       "</style>\n",
842
       "<table border=\"1\" class=\"dataframe\">\n",
843
       "  <thead>\n",
844
       "    <tr style=\"text-align: right;\">\n",
845
       "      <th></th>\n",
846
       "      <th>ID</th>\n",
847
       "      <th>Label</th>\n",
848
       "    </tr>\n",
849
       "  </thead>\n",
850
       "  <tbody>\n",
851
       "    <tr>\n",
852
       "      <th>0</th>\n",
853
       "      <td>ID_000012eaf_any</td>\n",
854
       "      <td>0.012404</td>\n",
855
       "    </tr>\n",
856
       "    <tr>\n",
857
       "      <th>1</th>\n",
858
       "      <td>ID_000012eaf_epidural</td>\n",
859
       "      <td>0.000464</td>\n",
860
       "    </tr>\n",
861
       "    <tr>\n",
862
       "      <th>2</th>\n",
863
       "      <td>ID_000012eaf_intraparenchymal</td>\n",
864
       "      <td>0.001818</td>\n",
865
       "    </tr>\n",
866
       "    <tr>\n",
867
       "      <th>3</th>\n",
868
       "      <td>ID_000012eaf_intraventricular</td>\n",
869
       "      <td>0.000576</td>\n",
870
       "    </tr>\n",
871
       "    <tr>\n",
872
       "      <th>4</th>\n",
873
       "      <td>ID_000012eaf_subarachnoid</td>\n",
874
       "      <td>0.001655</td>\n",
875
       "    </tr>\n",
876
       "    <tr>\n",
877
       "      <th>5</th>\n",
878
       "      <td>ID_000012eaf_subdural</td>\n",
879
       "      <td>0.010707</td>\n",
880
       "    </tr>\n",
881
       "    <tr>\n",
882
       "      <th>6</th>\n",
883
       "      <td>ID_0000ca2f6_any</td>\n",
884
       "      <td>0.002507</td>\n",
885
       "    </tr>\n",
886
       "    <tr>\n",
887
       "      <th>7</th>\n",
888
       "      <td>ID_0000ca2f6_epidural</td>\n",
889
       "      <td>0.000038</td>\n",
890
       "    </tr>\n",
891
       "    <tr>\n",
892
       "      <th>8</th>\n",
893
       "      <td>ID_0000ca2f6_intraparenchymal</td>\n",
894
       "      <td>0.000540</td>\n",
895
       "    </tr>\n",
896
       "    <tr>\n",
897
       "      <th>9</th>\n",
898
       "      <td>ID_0000ca2f6_intraventricular</td>\n",
899
       "      <td>0.000080</td>\n",
900
       "    </tr>\n",
901
       "    <tr>\n",
902
       "      <th>10</th>\n",
903
       "      <td>ID_0000ca2f6_subarachnoid</td>\n",
904
       "      <td>0.000490</td>\n",
905
       "    </tr>\n",
906
       "    <tr>\n",
907
       "      <th>11</th>\n",
908
       "      <td>ID_0000ca2f6_subdural</td>\n",
909
       "      <td>0.001157</td>\n",
910
       "    </tr>\n",
911
       "  </tbody>\n",
912
       "</table>\n",
913
       "</div>"
914
      ],
915
      "text/plain": [
916
       "                               ID     Label\n",
917
       "0                ID_000012eaf_any  0.012404\n",
918
       "1           ID_000012eaf_epidural  0.000464\n",
919
       "2   ID_000012eaf_intraparenchymal  0.001818\n",
920
       "3   ID_000012eaf_intraventricular  0.000576\n",
921
       "4       ID_000012eaf_subarachnoid  0.001655\n",
922
       "5           ID_000012eaf_subdural  0.010707\n",
923
       "6                ID_0000ca2f6_any  0.002507\n",
924
       "7           ID_0000ca2f6_epidural  0.000038\n",
925
       "8   ID_0000ca2f6_intraparenchymal  0.000540\n",
926
       "9   ID_0000ca2f6_intraventricular  0.000080\n",
927
       "10      ID_0000ca2f6_subarachnoid  0.000490\n",
928
       "11          ID_0000ca2f6_subdural  0.001157"
929
      ]
930
     },
931
     "execution_count": 36,
932
     "metadata": {},
933
     "output_type": "execute_result"
934
    },
935
    {
936
     "data": {
937
      "text/plain": [
938
       "(471270, 2)"
939
      ]
940
     },
941
     "execution_count": 36,
942
     "metadata": {},
943
     "output_type": "execute_result"
944
    }
945
   ],
946
   "source": [
947
    "submission_df=get_submission(test_df,torch.sigmoid(predss).mean(1),False)\n",
948
    "submission_df.head(12)\n",
949
    "submission_df.shape\n",
950
    "sub_num=999\n",
951
    "submission_df.to_csv('/media/hd/notebooks/data/RSNA/submissions/submission{}.csv'.format(sub_num),\n",
952
    "                                                                  index=False, columns=['ID','Label'])\n"
953
   ]
954
  },
955
  {
956
   "cell_type": "code",
957
   "execution_count": null,
958
   "metadata": {},
959
   "outputs": [],
960
   "source": []
961
  }
962
 ],
963
 "metadata": {
964
  "kernelspec": {
965
   "display_name": "Python 3",
966
   "language": "python",
967
   "name": "python3"
968
  },
969
  "language_info": {
970
   "codemirror_mode": {
971
    "name": "ipython",
972
    "version": 3
973
   },
974
   "file_extension": ".py",
975
   "mimetype": "text/x-python",
976
   "name": "python",
977
   "nbconvert_exporter": "python",
978
   "pygments_lexer": "ipython3",
979
   "version": "3.6.6"
980
  }
981
 },
982
 "nbformat": 4,
983
 "nbformat_minor": 2
984
}