--- a +++ b/eda/inference.ipynb @@ -0,0 +1,594 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import argparse\n", + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "import pandas as pd\n", + "from data_loader import read_trainset, DataGenerator\n", + "import parse_config" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# comment out if using tensorflow 2.x\n", + "if parse_config.USING_RTX_20XX:\n", + " config = tf.compat.v1.ConfigProto()\n", + " config.gpu_options.allow_growth = True\n", + " tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config))" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "MODEL_NAME = '../models/epoch3.hdf5'\n", + "img_size = (256,256,3)\n", + "batch_size=16\n", + "\n", + "test_images_dir = '/media/keil/baltar/intracranial-hemorrhage-detection-data/stage_2_test_images/'\n", + "testset_filename = \"../submissions/stage_2_sample_submission.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def read_testset(filename):\n", + " \"\"\" Read the submission sample csv\n", + " Args:\n", + " filename (str): Filename of the sample submission \n", + " Returns:\n", + " df (panda dataframe): Return a dataframe for inference. \n", + "\n", + " \"\"\"\n", + " df = pd.read_csv(filename)\n", + " df[\"Image\"] = df[\"ID\"].str.slice(stop=12)\n", + " df[\"Diagnosis\"] = df[\"ID\"].str.slice(start=13)\n", + "\n", + " df = df.loc[:, [\"Label\", \"Diagnosis\", \"Image\"]]\n", + " df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)\n", + "\n", + " return df\n", + "\n", + "def create_submission(model, data, test_df):\n", + "\n", + " print('+'*50)\n", + " print(\"Creating predictions on test dataset\")\n", + " pred = model.predict_generator(data, verbose=1)\n", + " out_df = pd.DataFrame(pred, index=test_df.index, columns=test_df.columns)\n", + " test_df = out_df.stack().reset_index()\n", + " test_df.insert(loc=0, column='ID', value=test_df['Image'].astype(str) + \"_\" + test_df['Diagnosis'])\n", + " test_df = test_df.drop([\"Image\", \"Diagnosis\"], axis=1)\n", + " print(\"Saving submissions to submission.csv\")\n", + " test_df.to_csv('../submissions/stage2-final-submission-v2.csv', index=False)\n", + "\n", + " return test_df" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "test_df = read_testset(testset_filename)\n", + "test_generator = DataGenerator(list_IDs = test_df.index, \n", + " batch_size = batch_size,\n", + " img_size = img_size,\n", + " img_dir = test_images_dir)\n", + "best_model = keras.models.load_model(MODEL_NAME, compile=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead tr th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe thead tr:last-of-type th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr>\n", + " <th></th>\n", + " <th colspan=\"6\" halign=\"left\">Label</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Diagnosis</th>\n", + " <th>any</th>\n", + " <th>epidural</th>\n", + " <th>intraparenchymal</th>\n", + " <th>intraventricular</th>\n", + " <th>subarachnoid</th>\n", + " <th>subdural</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Image</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>ID_000000e27</th>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ID_000009146</th>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ID_00007b8cb</th>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ID_000134952</th>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ID_000176f2a</th>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " <td>0.5</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Label \\\n", + "Diagnosis any epidural intraparenchymal intraventricular subarachnoid \n", + "Image \n", + "ID_000000e27 0.5 0.5 0.5 0.5 0.5 \n", + "ID_000009146 0.5 0.5 0.5 0.5 0.5 \n", + "ID_00007b8cb 0.5 0.5 0.5 0.5 0.5 \n", + "ID_000134952 0.5 0.5 0.5 0.5 0.5 \n", + "ID_000176f2a 0.5 0.5 0.5 0.5 0.5 \n", + "\n", + " \n", + "Diagnosis subdural \n", + "Image \n", + "ID_000000e27 0.5 \n", + "ID_000009146 0.5 \n", + "ID_00007b8cb 0.5 \n", + "ID_000134952 0.5 \n", + "ID_000176f2a 0.5 " + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#test_df shape: (121232, 6) -- 121232 files in stage_2_test via keil$ ls -1 stage_2_test_images/ | wc -l | less\n", + "assert len(test_generator.indices) == len(test_df == len(test_generator.list_IDs)) #checks out\n", + "\n", + "\n", + "test_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "What is going on is the batch size is not evenly divisable by the img count in the test2_stage of 121232/batch of 20 = remainder of 8 images thus the size of 121240 which I was seeing. Confirming now by using a batchsize of 16 which is evenly divisible... will confirm again via batch size = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "7577/7577 [==============================] - 6483s 856ms/step\n" + ] + } + ], + "source": [ + "# step through the functon line by line:\n", + "\n", + "# create_submission(best_model, test_generator, test_df)\n", + "# def create_submission(model, data, test_df):\n", + "\n", + "pred_batch16 = best_model.predict_generator(test_generator, verbose=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(121232, 6)" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pred_batch16.shape #good to go.... :D ffs" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "# After getting predictions here is some pandas gymnastics...\n", + "out_df = pd.DataFrame(pred_batch16, index=test_df.index, columns=test_df.columns)\n", + "\n", + "\n", + "test_df = out_df.stack().reset_index()\n", + "\n", + "\n", + "test_df.insert(loc=0, column='ID', value=test_df['Image'].astype(str) + \"_\" + test_df['Diagnosis'])\n", + "\n", + "\n", + "test_df = test_df.drop([\"Image\", \"Diagnosis\"], axis=1)\n", + "\n", + "\n", + "test_df.to_csv('../submissions/stage2-final-submission-v2.csv', index=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(121240, 6)" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pred.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "temp_df = pd.DataFrame(pred)\n", + "temp_df.to_csv('./temp_csv.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " <th>4</th>\n", + " <th>5</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0.117452</td>\n", + " <td>0.000942</td>\n", + " <td>0.067592</td>\n", + " <td>0.000453</td>\n", + " <td>0.052313</td>\n", + " <td>0.011529</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0.001256</td>\n", + " <td>0.000010</td>\n", + " <td>0.000121</td>\n", + " <td>0.000128</td>\n", + " <td>0.000440</td>\n", + " <td>0.000986</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0.002467</td>\n", + " <td>0.000215</td>\n", + " <td>0.003454</td>\n", + " <td>0.000158</td>\n", + " <td>0.000787</td>\n", + " <td>0.001039</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0.002803</td>\n", + " <td>0.000091</td>\n", + " <td>0.000339</td>\n", + " <td>0.000042</td>\n", + " <td>0.001047</td>\n", + " <td>0.001354</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0.002144</td>\n", + " <td>0.000046</td>\n", + " <td>0.000286</td>\n", + " <td>0.000154</td>\n", + " <td>0.000292</td>\n", + " <td>0.002259</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2 3 4 5\n", + "0 0.117452 0.000942 0.067592 0.000453 0.052313 0.011529\n", + "1 0.001256 0.000010 0.000121 0.000128 0.000440 0.000986\n", + "2 0.002467 0.000215 0.003454 0.000158 0.000787 0.001039\n", + "3 0.002803 0.000091 0.000339 0.000042 0.001047 0.001354\n", + "4 0.002144 0.000046 0.000286 0.000154 0.000292 0.002259" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_df.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}