[fb2ce2]: / eda / inference.ipynb

Download this file

595 lines (594 with data), 15.6 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "import argparse\n",
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "import pandas as pd\n",
    "from data_loader import read_trainset, DataGenerator\n",
    "import parse_config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# comment out if using tensorflow 2.x\n",
    "if parse_config.USING_RTX_20XX:\n",
    "    config = tf.compat.v1.ConfigProto()\n",
    "    config.gpu_options.allow_growth = True\n",
    "    tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_NAME = '../models/epoch3.hdf5'\n",
    "img_size = (256,256,3)\n",
    "batch_size=16\n",
    "\n",
    "test_images_dir = '/media/keil/baltar/intracranial-hemorrhage-detection-data/stage_2_test_images/'\n",
    "testset_filename = \"../submissions/stage_2_sample_submission.csv\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_testset(filename):\n",
    "    \"\"\" Read the submission sample csv\n",
    "        Args:\n",
    "            filename (str): Filename of the sample submission \n",
    "        Returns:\n",
    "            df (panda dataframe):  Return a dataframe for inference.  \n",
    "\n",
    "     \"\"\"\n",
    "    df = pd.read_csv(filename)\n",
    "    df[\"Image\"] = df[\"ID\"].str.slice(stop=12)\n",
    "    df[\"Diagnosis\"] = df[\"ID\"].str.slice(start=13)\n",
    "\n",
    "    df = df.loc[:, [\"Label\", \"Diagnosis\", \"Image\"]]\n",
    "    df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)\n",
    "\n",
    "    return df\n",
    "\n",
    "def create_submission(model, data, test_df):\n",
    "\n",
    "    print('+'*50)\n",
    "    print(\"Creating predictions on test dataset\")\n",
    "    pred = model.predict_generator(data, verbose=1)\n",
    "    out_df = pd.DataFrame(pred, index=test_df.index, columns=test_df.columns)\n",
    "    test_df = out_df.stack().reset_index()\n",
    "    test_df.insert(loc=0, column='ID', value=test_df['Image'].astype(str) + \"_\" + test_df['Diagnosis'])\n",
    "    test_df = test_df.drop([\"Image\", \"Diagnosis\"], axis=1)\n",
    "    print(\"Saving submissions to submission.csv\")\n",
    "    test_df.to_csv('../submissions/stage2-final-submission-v2.csv', index=False)\n",
    "\n",
    "    return test_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_df = read_testset(testset_filename)\n",
    "test_generator = DataGenerator(list_IDs = test_df.index, \n",
    "                                batch_size = batch_size,\n",
    "                                img_size = img_size,\n",
    "                                img_dir = test_images_dir)\n",
    "best_model = keras.models.load_model(MODEL_NAME, compile=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"6\" halign=\"left\">Label</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Diagnosis</th>\n",
       "      <th>any</th>\n",
       "      <th>epidural</th>\n",
       "      <th>intraparenchymal</th>\n",
       "      <th>intraventricular</th>\n",
       "      <th>subarachnoid</th>\n",
       "      <th>subdural</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Image</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ID_000000e27</th>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID_000009146</th>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID_00007b8cb</th>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID_000134952</th>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID_000176f2a</th>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Label                                                          \\\n",
       "Diagnosis      any epidural intraparenchymal intraventricular subarachnoid   \n",
       "Image                                                                        \n",
       "ID_000000e27   0.5      0.5              0.5              0.5          0.5   \n",
       "ID_000009146   0.5      0.5              0.5              0.5          0.5   \n",
       "ID_00007b8cb   0.5      0.5              0.5              0.5          0.5   \n",
       "ID_000134952   0.5      0.5              0.5              0.5          0.5   \n",
       "ID_000176f2a   0.5      0.5              0.5              0.5          0.5   \n",
       "\n",
       "                       \n",
       "Diagnosis    subdural  \n",
       "Image                  \n",
       "ID_000000e27      0.5  \n",
       "ID_000009146      0.5  \n",
       "ID_00007b8cb      0.5  \n",
       "ID_000134952      0.5  \n",
       "ID_000176f2a      0.5  "
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#test_df shape: (121232, 6) -- 121232 files in stage_2_test via keil$ ls -1 stage_2_test_images/ | wc -l | less\n",
    "assert len(test_generator.indices) == len(test_df == len(test_generator.list_IDs)) #checks out\n",
    "\n",
    "\n",
    "test_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "What is going on is the batch size is not evenly divisable by the img count in the test2_stage of 121232/batch of 20 = remainder of 8 images thus the size of 121240 which I was seeing. Confirming now by using a batchsize of 16 which is evenly divisible... will confirm again via batch size = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7577/7577 [==============================] - 6483s 856ms/step\n"
     ]
    }
   ],
   "source": [
    "# step through the functon line by line:\n",
    "\n",
    "# create_submission(best_model, test_generator, test_df)\n",
    "# def create_submission(model, data, test_df):\n",
    "\n",
    "pred_batch16 = best_model.predict_generator(test_generator, verbose=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(121232, 6)"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pred_batch16.shape #good to go.... :D ffs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "# After getting predictions here is some pandas gymnastics...\n",
    "out_df = pd.DataFrame(pred_batch16, index=test_df.index, columns=test_df.columns)\n",
    "\n",
    "\n",
    "test_df = out_df.stack().reset_index()\n",
    "\n",
    "\n",
    "test_df.insert(loc=0, column='ID', value=test_df['Image'].astype(str) + \"_\" + test_df['Diagnosis'])\n",
    "\n",
    "\n",
    "test_df = test_df.drop([\"Image\", \"Diagnosis\"], axis=1)\n",
    "\n",
    "\n",
    "test_df.to_csv('../submissions/stage2-final-submission-v2.csv', index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(121240, 6)"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pred.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "temp_df = pd.DataFrame(pred)\n",
    "temp_df.to_csv('./temp_csv.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.117452</td>\n",
       "      <td>0.000942</td>\n",
       "      <td>0.067592</td>\n",
       "      <td>0.000453</td>\n",
       "      <td>0.052313</td>\n",
       "      <td>0.011529</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.001256</td>\n",
       "      <td>0.000010</td>\n",
       "      <td>0.000121</td>\n",
       "      <td>0.000128</td>\n",
       "      <td>0.000440</td>\n",
       "      <td>0.000986</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.002467</td>\n",
       "      <td>0.000215</td>\n",
       "      <td>0.003454</td>\n",
       "      <td>0.000158</td>\n",
       "      <td>0.000787</td>\n",
       "      <td>0.001039</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.002803</td>\n",
       "      <td>0.000091</td>\n",
       "      <td>0.000339</td>\n",
       "      <td>0.000042</td>\n",
       "      <td>0.001047</td>\n",
       "      <td>0.001354</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.002144</td>\n",
       "      <td>0.000046</td>\n",
       "      <td>0.000286</td>\n",
       "      <td>0.000154</td>\n",
       "      <td>0.000292</td>\n",
       "      <td>0.002259</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3         4         5\n",
       "0  0.117452  0.000942  0.067592  0.000453  0.052313  0.011529\n",
       "1  0.001256  0.000010  0.000121  0.000128  0.000440  0.000986\n",
       "2  0.002467  0.000215  0.003454  0.000158  0.000787  0.001039\n",
       "3  0.002803  0.000091  0.000339  0.000042  0.001047  0.001354\n",
       "4  0.002144  0.000046  0.000286  0.000154  0.000292  0.002259"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "temp_df.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}