Diff of /eda/inference.ipynb [000000] .. [fb2ce2]

Switch to side-by-side view

--- a
+++ b/eda/inference.ipynb
@@ -0,0 +1,594 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import argparse\n",
+    "import tensorflow as tf\n",
+    "from tensorflow import keras\n",
+    "import pandas as pd\n",
+    "from data_loader import read_trainset, DataGenerator\n",
+    "import parse_config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# comment out if using tensorflow 2.x\n",
+    "if parse_config.USING_RTX_20XX:\n",
+    "    config = tf.compat.v1.ConfigProto()\n",
+    "    config.gpu_options.allow_growth = True\n",
+    "    tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "MODEL_NAME = '../models/epoch3.hdf5'\n",
+    "img_size = (256,256,3)\n",
+    "batch_size=16\n",
+    "\n",
+    "test_images_dir = '/media/keil/baltar/intracranial-hemorrhage-detection-data/stage_2_test_images/'\n",
+    "testset_filename = \"../submissions/stage_2_sample_submission.csv\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_testset(filename):\n",
+    "    \"\"\" Read the submission sample csv\n",
+    "        Args:\n",
+    "            filename (str): Filename of the sample submission \n",
+    "        Returns:\n",
+    "            df (panda dataframe):  Return a dataframe for inference.  \n",
+    "\n",
+    "     \"\"\"\n",
+    "    df = pd.read_csv(filename)\n",
+    "    df[\"Image\"] = df[\"ID\"].str.slice(stop=12)\n",
+    "    df[\"Diagnosis\"] = df[\"ID\"].str.slice(start=13)\n",
+    "\n",
+    "    df = df.loc[:, [\"Label\", \"Diagnosis\", \"Image\"]]\n",
+    "    df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "def create_submission(model, data, test_df):\n",
+    "\n",
+    "    print('+'*50)\n",
+    "    print(\"Creating predictions on test dataset\")\n",
+    "    pred = model.predict_generator(data, verbose=1)\n",
+    "    out_df = pd.DataFrame(pred, index=test_df.index, columns=test_df.columns)\n",
+    "    test_df = out_df.stack().reset_index()\n",
+    "    test_df.insert(loc=0, column='ID', value=test_df['Image'].astype(str) + \"_\" + test_df['Diagnosis'])\n",
+    "    test_df = test_df.drop([\"Image\", \"Diagnosis\"], axis=1)\n",
+    "    print(\"Saving submissions to submission.csv\")\n",
+    "    test_df.to_csv('../submissions/stage2-final-submission-v2.csv', index=False)\n",
+    "\n",
+    "    return test_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_df = read_testset(testset_filename)\n",
+    "test_generator = DataGenerator(list_IDs = test_df.index, \n",
+    "                                batch_size = batch_size,\n",
+    "                                img_size = img_size,\n",
+    "                                img_dir = test_images_dir)\n",
+    "best_model = keras.models.load_model(MODEL_NAME, compile=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th colspan=\"6\" halign=\"left\">Label</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Diagnosis</th>\n",
+       "      <th>any</th>\n",
+       "      <th>epidural</th>\n",
+       "      <th>intraparenchymal</th>\n",
+       "      <th>intraventricular</th>\n",
+       "      <th>subarachnoid</th>\n",
+       "      <th>subdural</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Image</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>ID_000000e27</th>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>ID_000009146</th>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>ID_00007b8cb</th>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>ID_000134952</th>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>ID_000176f2a</th>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             Label                                                          \\\n",
+       "Diagnosis      any epidural intraparenchymal intraventricular subarachnoid   \n",
+       "Image                                                                        \n",
+       "ID_000000e27   0.5      0.5              0.5              0.5          0.5   \n",
+       "ID_000009146   0.5      0.5              0.5              0.5          0.5   \n",
+       "ID_00007b8cb   0.5      0.5              0.5              0.5          0.5   \n",
+       "ID_000134952   0.5      0.5              0.5              0.5          0.5   \n",
+       "ID_000176f2a   0.5      0.5              0.5              0.5          0.5   \n",
+       "\n",
+       "                       \n",
+       "Diagnosis    subdural  \n",
+       "Image                  \n",
+       "ID_000000e27      0.5  \n",
+       "ID_000009146      0.5  \n",
+       "ID_00007b8cb      0.5  \n",
+       "ID_000134952      0.5  \n",
+       "ID_000176f2a      0.5  "
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#test_df shape: (121232, 6) -- 121232 files in stage_2_test via keil$ ls -1 stage_2_test_images/ | wc -l | less\n",
+    "assert len(test_generator.indices) == len(test_df == len(test_generator.list_IDs)) #checks out\n",
+    "\n",
+    "\n",
+    "test_df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "What is going on is the batch size is not evenly divisable by the img count in the test2_stage of 121232/batch of 20 = remainder of 8 images thus the size of 121240 which I was seeing. Confirming now by using a batchsize of 16 which is evenly divisible... will confirm again via batch size = 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "7577/7577 [==============================] - 6483s 856ms/step\n"
+     ]
+    }
+   ],
+   "source": [
+    "# step through the functon line by line:\n",
+    "\n",
+    "# create_submission(best_model, test_generator, test_df)\n",
+    "# def create_submission(model, data, test_df):\n",
+    "\n",
+    "pred_batch16 = best_model.predict_generator(test_generator, verbose=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(121232, 6)"
+      ]
+     },
+     "execution_count": 69,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pred_batch16.shape #good to go.... :D ffs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# After getting predictions here is some pandas gymnastics...\n",
+    "out_df = pd.DataFrame(pred_batch16, index=test_df.index, columns=test_df.columns)\n",
+    "\n",
+    "\n",
+    "test_df = out_df.stack().reset_index()\n",
+    "\n",
+    "\n",
+    "test_df.insert(loc=0, column='ID', value=test_df['Image'].astype(str) + \"_\" + test_df['Diagnosis'])\n",
+    "\n",
+    "\n",
+    "test_df = test_df.drop([\"Image\", \"Diagnosis\"], axis=1)\n",
+    "\n",
+    "\n",
+    "test_df.to_csv('../submissions/stage2-final-submission-v2.csv', index=False)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(121240, 6)"
+      ]
+     },
+     "execution_count": 70,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pred.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "temp_df = pd.DataFrame(pred)\n",
+    "temp_df.to_csv('./temp_csv.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.117452</td>\n",
+       "      <td>0.000942</td>\n",
+       "      <td>0.067592</td>\n",
+       "      <td>0.000453</td>\n",
+       "      <td>0.052313</td>\n",
+       "      <td>0.011529</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.001256</td>\n",
+       "      <td>0.000010</td>\n",
+       "      <td>0.000121</td>\n",
+       "      <td>0.000128</td>\n",
+       "      <td>0.000440</td>\n",
+       "      <td>0.000986</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.002467</td>\n",
+       "      <td>0.000215</td>\n",
+       "      <td>0.003454</td>\n",
+       "      <td>0.000158</td>\n",
+       "      <td>0.000787</td>\n",
+       "      <td>0.001039</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.002803</td>\n",
+       "      <td>0.000091</td>\n",
+       "      <td>0.000339</td>\n",
+       "      <td>0.000042</td>\n",
+       "      <td>0.001047</td>\n",
+       "      <td>0.001354</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.002144</td>\n",
+       "      <td>0.000046</td>\n",
+       "      <td>0.000286</td>\n",
+       "      <td>0.000154</td>\n",
+       "      <td>0.000292</td>\n",
+       "      <td>0.002259</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          0         1         2         3         4         5\n",
+       "0  0.117452  0.000942  0.067592  0.000453  0.052313  0.011529\n",
+       "1  0.001256  0.000010  0.000121  0.000128  0.000440  0.000986\n",
+       "2  0.002467  0.000215  0.003454  0.000158  0.000787  0.001039\n",
+       "3  0.002803  0.000091  0.000339  0.000042  0.001047  0.001354\n",
+       "4  0.002144  0.000046  0.000286  0.000154  0.000292  0.002259"
+      ]
+     },
+     "execution_count": 58,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "temp_df.head()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}