Diff of /eda/inference.ipynb [000000] .. [fb2ce2]

Switch to unified view

a b/eda/inference.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": null,
6
   "metadata": {},
7
   "outputs": [],
8
   "source": []
9
  },
10
  {
11
   "cell_type": "code",
12
   "execution_count": null,
13
   "metadata": {},
14
   "outputs": [],
15
   "source": []
16
  },
17
  {
18
   "cell_type": "code",
19
   "execution_count": null,
20
   "metadata": {},
21
   "outputs": [],
22
   "source": []
23
  },
24
  {
25
   "cell_type": "code",
26
   "execution_count": null,
27
   "metadata": {},
28
   "outputs": [],
29
   "source": []
30
  },
31
  {
32
   "cell_type": "code",
33
   "execution_count": null,
34
   "metadata": {},
35
   "outputs": [],
36
   "source": []
37
  },
38
  {
39
   "cell_type": "code",
40
   "execution_count": null,
41
   "metadata": {},
42
   "outputs": [],
43
   "source": []
44
  },
45
  {
46
   "cell_type": "code",
47
   "execution_count": null,
48
   "metadata": {},
49
   "outputs": [],
50
   "source": []
51
  },
52
  {
53
   "cell_type": "code",
54
   "execution_count": null,
55
   "metadata": {},
56
   "outputs": [],
57
   "source": []
58
  },
59
  {
60
   "cell_type": "code",
61
   "execution_count": 1,
62
   "metadata": {},
63
   "outputs": [],
64
   "source": [
65
    "import os\n",
66
    "import sys\n",
67
    "import argparse\n",
68
    "import tensorflow as tf\n",
69
    "from tensorflow import keras\n",
70
    "import pandas as pd\n",
71
    "from data_loader import read_trainset, DataGenerator\n",
72
    "import parse_config"
73
   ]
74
  },
75
  {
76
   "cell_type": "code",
77
   "execution_count": 6,
78
   "metadata": {},
79
   "outputs": [],
80
   "source": [
81
    "# comment out if using tensorflow 2.x\n",
82
    "if parse_config.USING_RTX_20XX:\n",
83
    "    config = tf.compat.v1.ConfigProto()\n",
84
    "    config.gpu_options.allow_growth = True\n",
85
    "    tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config))"
86
   ]
87
  },
88
  {
89
   "cell_type": "code",
90
   "execution_count": 65,
91
   "metadata": {},
92
   "outputs": [],
93
   "source": [
94
    "MODEL_NAME = '../models/epoch3.hdf5'\n",
95
    "img_size = (256,256,3)\n",
96
    "batch_size=16\n",
97
    "\n",
98
    "test_images_dir = '/media/keil/baltar/intracranial-hemorrhage-detection-data/stage_2_test_images/'\n",
99
    "testset_filename = \"../submissions/stage_2_sample_submission.csv\""
100
   ]
101
  },
102
  {
103
   "cell_type": "code",
104
   "execution_count": 9,
105
   "metadata": {},
106
   "outputs": [],
107
   "source": [
108
    "def read_testset(filename):\n",
109
    "    \"\"\" Read the submission sample csv\n",
110
    "        Args:\n",
111
    "            filename (str): Filename of the sample submission \n",
112
    "        Returns:\n",
113
    "            df (panda dataframe):  Return a dataframe for inference.  \n",
114
    "\n",
115
    "     \"\"\"\n",
116
    "    df = pd.read_csv(filename)\n",
117
    "    df[\"Image\"] = df[\"ID\"].str.slice(stop=12)\n",
118
    "    df[\"Diagnosis\"] = df[\"ID\"].str.slice(start=13)\n",
119
    "\n",
120
    "    df = df.loc[:, [\"Label\", \"Diagnosis\", \"Image\"]]\n",
121
    "    df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)\n",
122
    "\n",
123
    "    return df\n",
124
    "\n",
125
    "def create_submission(model, data, test_df):\n",
126
    "\n",
127
    "    print('+'*50)\n",
128
    "    print(\"Creating predictions on test dataset\")\n",
129
    "    pred = model.predict_generator(data, verbose=1)\n",
130
    "    out_df = pd.DataFrame(pred, index=test_df.index, columns=test_df.columns)\n",
131
    "    test_df = out_df.stack().reset_index()\n",
132
    "    test_df.insert(loc=0, column='ID', value=test_df['Image'].astype(str) + \"_\" + test_df['Diagnosis'])\n",
133
    "    test_df = test_df.drop([\"Image\", \"Diagnosis\"], axis=1)\n",
134
    "    print(\"Saving submissions to submission.csv\")\n",
135
    "    test_df.to_csv('../submissions/stage2-final-submission-v2.csv', index=False)\n",
136
    "\n",
137
    "    return test_df"
138
   ]
139
  },
140
  {
141
   "cell_type": "code",
142
   "execution_count": 66,
143
   "metadata": {},
144
   "outputs": [],
145
   "source": [
146
    "test_df = read_testset(testset_filename)\n",
147
    "test_generator = DataGenerator(list_IDs = test_df.index, \n",
148
    "                                batch_size = batch_size,\n",
149
    "                                img_size = img_size,\n",
150
    "                                img_dir = test_images_dir)\n",
151
    "best_model = keras.models.load_model(MODEL_NAME, compile=False)"
152
   ]
153
  },
154
  {
155
   "cell_type": "code",
156
   "execution_count": 67,
157
   "metadata": {},
158
   "outputs": [
159
    {
160
     "data": {
161
      "text/html": [
162
       "<div>\n",
163
       "<style scoped>\n",
164
       "    .dataframe tbody tr th:only-of-type {\n",
165
       "        vertical-align: middle;\n",
166
       "    }\n",
167
       "\n",
168
       "    .dataframe tbody tr th {\n",
169
       "        vertical-align: top;\n",
170
       "    }\n",
171
       "\n",
172
       "    .dataframe thead tr th {\n",
173
       "        text-align: left;\n",
174
       "    }\n",
175
       "\n",
176
       "    .dataframe thead tr:last-of-type th {\n",
177
       "        text-align: right;\n",
178
       "    }\n",
179
       "</style>\n",
180
       "<table border=\"1\" class=\"dataframe\">\n",
181
       "  <thead>\n",
182
       "    <tr>\n",
183
       "      <th></th>\n",
184
       "      <th colspan=\"6\" halign=\"left\">Label</th>\n",
185
       "    </tr>\n",
186
       "    <tr>\n",
187
       "      <th>Diagnosis</th>\n",
188
       "      <th>any</th>\n",
189
       "      <th>epidural</th>\n",
190
       "      <th>intraparenchymal</th>\n",
191
       "      <th>intraventricular</th>\n",
192
       "      <th>subarachnoid</th>\n",
193
       "      <th>subdural</th>\n",
194
       "    </tr>\n",
195
       "    <tr>\n",
196
       "      <th>Image</th>\n",
197
       "      <th></th>\n",
198
       "      <th></th>\n",
199
       "      <th></th>\n",
200
       "      <th></th>\n",
201
       "      <th></th>\n",
202
       "      <th></th>\n",
203
       "    </tr>\n",
204
       "  </thead>\n",
205
       "  <tbody>\n",
206
       "    <tr>\n",
207
       "      <th>ID_000000e27</th>\n",
208
       "      <td>0.5</td>\n",
209
       "      <td>0.5</td>\n",
210
       "      <td>0.5</td>\n",
211
       "      <td>0.5</td>\n",
212
       "      <td>0.5</td>\n",
213
       "      <td>0.5</td>\n",
214
       "    </tr>\n",
215
       "    <tr>\n",
216
       "      <th>ID_000009146</th>\n",
217
       "      <td>0.5</td>\n",
218
       "      <td>0.5</td>\n",
219
       "      <td>0.5</td>\n",
220
       "      <td>0.5</td>\n",
221
       "      <td>0.5</td>\n",
222
       "      <td>0.5</td>\n",
223
       "    </tr>\n",
224
       "    <tr>\n",
225
       "      <th>ID_00007b8cb</th>\n",
226
       "      <td>0.5</td>\n",
227
       "      <td>0.5</td>\n",
228
       "      <td>0.5</td>\n",
229
       "      <td>0.5</td>\n",
230
       "      <td>0.5</td>\n",
231
       "      <td>0.5</td>\n",
232
       "    </tr>\n",
233
       "    <tr>\n",
234
       "      <th>ID_000134952</th>\n",
235
       "      <td>0.5</td>\n",
236
       "      <td>0.5</td>\n",
237
       "      <td>0.5</td>\n",
238
       "      <td>0.5</td>\n",
239
       "      <td>0.5</td>\n",
240
       "      <td>0.5</td>\n",
241
       "    </tr>\n",
242
       "    <tr>\n",
243
       "      <th>ID_000176f2a</th>\n",
244
       "      <td>0.5</td>\n",
245
       "      <td>0.5</td>\n",
246
       "      <td>0.5</td>\n",
247
       "      <td>0.5</td>\n",
248
       "      <td>0.5</td>\n",
249
       "      <td>0.5</td>\n",
250
       "    </tr>\n",
251
       "  </tbody>\n",
252
       "</table>\n",
253
       "</div>"
254
      ],
255
      "text/plain": [
256
       "             Label                                                          \\\n",
257
       "Diagnosis      any epidural intraparenchymal intraventricular subarachnoid   \n",
258
       "Image                                                                        \n",
259
       "ID_000000e27   0.5      0.5              0.5              0.5          0.5   \n",
260
       "ID_000009146   0.5      0.5              0.5              0.5          0.5   \n",
261
       "ID_00007b8cb   0.5      0.5              0.5              0.5          0.5   \n",
262
       "ID_000134952   0.5      0.5              0.5              0.5          0.5   \n",
263
       "ID_000176f2a   0.5      0.5              0.5              0.5          0.5   \n",
264
       "\n",
265
       "                       \n",
266
       "Diagnosis    subdural  \n",
267
       "Image                  \n",
268
       "ID_000000e27      0.5  \n",
269
       "ID_000009146      0.5  \n",
270
       "ID_00007b8cb      0.5  \n",
271
       "ID_000134952      0.5  \n",
272
       "ID_000176f2a      0.5  "
273
      ]
274
     },
275
     "execution_count": 67,
276
     "metadata": {},
277
     "output_type": "execute_result"
278
    }
279
   ],
280
   "source": [
281
    "#test_df shape: (121232, 6) -- 121232 files in stage_2_test via keil$ ls -1 stage_2_test_images/ | wc -l | less\n",
282
    "assert len(test_generator.indices) == len(test_df == len(test_generator.list_IDs)) #checks out\n",
283
    "\n",
284
    "\n",
285
    "test_df.head()"
286
   ]
287
  },
288
  {
289
   "cell_type": "markdown",
290
   "metadata": {},
291
   "source": [
292
    "What is going on is the batch size is not evenly divisable by the img count in the test2_stage of 121232/batch of 20 = remainder of 8 images thus the size of 121240 which I was seeing. Confirming now by using a batchsize of 16 which is evenly divisible... will confirm again via batch size = 1"
293
   ]
294
  },
295
  {
296
   "cell_type": "code",
297
   "execution_count": 68,
298
   "metadata": {},
299
   "outputs": [
300
    {
301
     "name": "stdout",
302
     "output_type": "stream",
303
     "text": [
304
      "7577/7577 [==============================] - 6483s 856ms/step\n"
305
     ]
306
    }
307
   ],
308
   "source": [
309
    "# step through the functon line by line:\n",
310
    "\n",
311
    "# create_submission(best_model, test_generator, test_df)\n",
312
    "# def create_submission(model, data, test_df):\n",
313
    "\n",
314
    "pred_batch16 = best_model.predict_generator(test_generator, verbose=1)"
315
   ]
316
  },
317
  {
318
   "cell_type": "code",
319
   "execution_count": 69,
320
   "metadata": {},
321
   "outputs": [
322
    {
323
     "data": {
324
      "text/plain": [
325
       "(121232, 6)"
326
      ]
327
     },
328
     "execution_count": 69,
329
     "metadata": {},
330
     "output_type": "execute_result"
331
    }
332
   ],
333
   "source": [
334
    "pred_batch16.shape #good to go.... :D ffs"
335
   ]
336
  },
337
  {
338
   "cell_type": "code",
339
   "execution_count": 71,
340
   "metadata": {},
341
   "outputs": [],
342
   "source": [
343
    "# After getting predictions here is some pandas gymnastics...\n",
344
    "out_df = pd.DataFrame(pred_batch16, index=test_df.index, columns=test_df.columns)\n",
345
    "\n",
346
    "\n",
347
    "test_df = out_df.stack().reset_index()\n",
348
    "\n",
349
    "\n",
350
    "test_df.insert(loc=0, column='ID', value=test_df['Image'].astype(str) + \"_\" + test_df['Diagnosis'])\n",
351
    "\n",
352
    "\n",
353
    "test_df = test_df.drop([\"Image\", \"Diagnosis\"], axis=1)\n",
354
    "\n",
355
    "\n",
356
    "test_df.to_csv('../submissions/stage2-final-submission-v2.csv', index=False)\n"
357
   ]
358
  },
359
  {
360
   "cell_type": "code",
361
   "execution_count": null,
362
   "metadata": {},
363
   "outputs": [],
364
   "source": []
365
  },
366
  {
367
   "cell_type": "code",
368
   "execution_count": null,
369
   "metadata": {},
370
   "outputs": [],
371
   "source": []
372
  },
373
  {
374
   "cell_type": "code",
375
   "execution_count": 70,
376
   "metadata": {},
377
   "outputs": [
378
    {
379
     "data": {
380
      "text/plain": [
381
       "(121240, 6)"
382
      ]
383
     },
384
     "execution_count": 70,
385
     "metadata": {},
386
     "output_type": "execute_result"
387
    }
388
   ],
389
   "source": [
390
    "pred.shape"
391
   ]
392
  },
393
  {
394
   "cell_type": "code",
395
   "execution_count": 59,
396
   "metadata": {},
397
   "outputs": [],
398
   "source": [
399
    "temp_df = pd.DataFrame(pred)\n",
400
    "temp_df.to_csv('./temp_csv.csv')"
401
   ]
402
  },
403
  {
404
   "cell_type": "code",
405
   "execution_count": 58,
406
   "metadata": {},
407
   "outputs": [
408
    {
409
     "data": {
410
      "text/html": [
411
       "<div>\n",
412
       "<style scoped>\n",
413
       "    .dataframe tbody tr th:only-of-type {\n",
414
       "        vertical-align: middle;\n",
415
       "    }\n",
416
       "\n",
417
       "    .dataframe tbody tr th {\n",
418
       "        vertical-align: top;\n",
419
       "    }\n",
420
       "\n",
421
       "    .dataframe thead th {\n",
422
       "        text-align: right;\n",
423
       "    }\n",
424
       "</style>\n",
425
       "<table border=\"1\" class=\"dataframe\">\n",
426
       "  <thead>\n",
427
       "    <tr style=\"text-align: right;\">\n",
428
       "      <th></th>\n",
429
       "      <th>0</th>\n",
430
       "      <th>1</th>\n",
431
       "      <th>2</th>\n",
432
       "      <th>3</th>\n",
433
       "      <th>4</th>\n",
434
       "      <th>5</th>\n",
435
       "    </tr>\n",
436
       "  </thead>\n",
437
       "  <tbody>\n",
438
       "    <tr>\n",
439
       "      <th>0</th>\n",
440
       "      <td>0.117452</td>\n",
441
       "      <td>0.000942</td>\n",
442
       "      <td>0.067592</td>\n",
443
       "      <td>0.000453</td>\n",
444
       "      <td>0.052313</td>\n",
445
       "      <td>0.011529</td>\n",
446
       "    </tr>\n",
447
       "    <tr>\n",
448
       "      <th>1</th>\n",
449
       "      <td>0.001256</td>\n",
450
       "      <td>0.000010</td>\n",
451
       "      <td>0.000121</td>\n",
452
       "      <td>0.000128</td>\n",
453
       "      <td>0.000440</td>\n",
454
       "      <td>0.000986</td>\n",
455
       "    </tr>\n",
456
       "    <tr>\n",
457
       "      <th>2</th>\n",
458
       "      <td>0.002467</td>\n",
459
       "      <td>0.000215</td>\n",
460
       "      <td>0.003454</td>\n",
461
       "      <td>0.000158</td>\n",
462
       "      <td>0.000787</td>\n",
463
       "      <td>0.001039</td>\n",
464
       "    </tr>\n",
465
       "    <tr>\n",
466
       "      <th>3</th>\n",
467
       "      <td>0.002803</td>\n",
468
       "      <td>0.000091</td>\n",
469
       "      <td>0.000339</td>\n",
470
       "      <td>0.000042</td>\n",
471
       "      <td>0.001047</td>\n",
472
       "      <td>0.001354</td>\n",
473
       "    </tr>\n",
474
       "    <tr>\n",
475
       "      <th>4</th>\n",
476
       "      <td>0.002144</td>\n",
477
       "      <td>0.000046</td>\n",
478
       "      <td>0.000286</td>\n",
479
       "      <td>0.000154</td>\n",
480
       "      <td>0.000292</td>\n",
481
       "      <td>0.002259</td>\n",
482
       "    </tr>\n",
483
       "  </tbody>\n",
484
       "</table>\n",
485
       "</div>"
486
      ],
487
      "text/plain": [
488
       "          0         1         2         3         4         5\n",
489
       "0  0.117452  0.000942  0.067592  0.000453  0.052313  0.011529\n",
490
       "1  0.001256  0.000010  0.000121  0.000128  0.000440  0.000986\n",
491
       "2  0.002467  0.000215  0.003454  0.000158  0.000787  0.001039\n",
492
       "3  0.002803  0.000091  0.000339  0.000042  0.001047  0.001354\n",
493
       "4  0.002144  0.000046  0.000286  0.000154  0.000292  0.002259"
494
      ]
495
     },
496
     "execution_count": 58,
497
     "metadata": {},
498
     "output_type": "execute_result"
499
    }
500
   ],
501
   "source": [
502
    "temp_df.head()\n"
503
   ]
504
  },
505
  {
506
   "cell_type": "code",
507
   "execution_count": null,
508
   "metadata": {},
509
   "outputs": [],
510
   "source": []
511
  },
512
  {
513
   "cell_type": "code",
514
   "execution_count": null,
515
   "metadata": {},
516
   "outputs": [],
517
   "source": [
518
    "\n"
519
   ]
520
  },
521
  {
522
   "cell_type": "code",
523
   "execution_count": null,
524
   "metadata": {},
525
   "outputs": [],
526
   "source": []
527
  },
528
  {
529
   "cell_type": "code",
530
   "execution_count": null,
531
   "metadata": {},
532
   "outputs": [],
533
   "source": [
534
    "\n"
535
   ]
536
  },
537
  {
538
   "cell_type": "code",
539
   "execution_count": null,
540
   "metadata": {},
541
   "outputs": [],
542
   "source": []
543
  },
544
  {
545
   "cell_type": "code",
546
   "execution_count": null,
547
   "metadata": {},
548
   "outputs": [],
549
   "source": []
550
  },
551
  {
552
   "cell_type": "code",
553
   "execution_count": null,
554
   "metadata": {},
555
   "outputs": [],
556
   "source": []
557
  },
558
  {
559
   "cell_type": "code",
560
   "execution_count": null,
561
   "metadata": {},
562
   "outputs": [],
563
   "source": []
564
  },
565
  {
566
   "cell_type": "code",
567
   "execution_count": null,
568
   "metadata": {},
569
   "outputs": [],
570
   "source": []
571
  }
572
 ],
573
 "metadata": {
574
  "kernelspec": {
575
   "display_name": "Python 3",
576
   "language": "python",
577
   "name": "python3"
578
  },
579
  "language_info": {
580
   "codemirror_mode": {
581
    "name": "ipython",
582
    "version": 3
583
   },
584
   "file_extension": ".py",
585
   "mimetype": "text/x-python",
586
   "name": "python",
587
   "nbconvert_exporter": "python",
588
   "pygments_lexer": "ipython3",
589
   "version": "3.6.5"
590
  }
591
 },
592
 "nbformat": 4,
593
 "nbformat_minor": 2
594
}