595 lines (594 with data), 15.6 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"import argparse\n",
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"import pandas as pd\n",
"from data_loader import read_trainset, DataGenerator\n",
"import parse_config"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# comment out if using tensorflow 2.x\n",
"if parse_config.USING_RTX_20XX:\n",
" config = tf.compat.v1.ConfigProto()\n",
" config.gpu_options.allow_growth = True\n",
" tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config))"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"MODEL_NAME = '../models/epoch3.hdf5'\n",
"img_size = (256,256,3)\n",
"batch_size=16\n",
"\n",
"test_images_dir = '/media/keil/baltar/intracranial-hemorrhage-detection-data/stage_2_test_images/'\n",
"testset_filename = \"../submissions/stage_2_sample_submission.csv\""
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def read_testset(filename):\n",
" \"\"\" Read the submission sample csv\n",
" Args:\n",
" filename (str): Filename of the sample submission \n",
" Returns:\n",
" df (panda dataframe): Return a dataframe for inference. \n",
"\n",
" \"\"\"\n",
" df = pd.read_csv(filename)\n",
" df[\"Image\"] = df[\"ID\"].str.slice(stop=12)\n",
" df[\"Diagnosis\"] = df[\"ID\"].str.slice(start=13)\n",
"\n",
" df = df.loc[:, [\"Label\", \"Diagnosis\", \"Image\"]]\n",
" df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)\n",
"\n",
" return df\n",
"\n",
"def create_submission(model, data, test_df):\n",
"\n",
" print('+'*50)\n",
" print(\"Creating predictions on test dataset\")\n",
" pred = model.predict_generator(data, verbose=1)\n",
" out_df = pd.DataFrame(pred, index=test_df.index, columns=test_df.columns)\n",
" test_df = out_df.stack().reset_index()\n",
" test_df.insert(loc=0, column='ID', value=test_df['Image'].astype(str) + \"_\" + test_df['Diagnosis'])\n",
" test_df = test_df.drop([\"Image\", \"Diagnosis\"], axis=1)\n",
" print(\"Saving submissions to submission.csv\")\n",
" test_df.to_csv('../submissions/stage2-final-submission-v2.csv', index=False)\n",
"\n",
" return test_df"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"test_df = read_testset(testset_filename)\n",
"test_generator = DataGenerator(list_IDs = test_df.index, \n",
" batch_size = batch_size,\n",
" img_size = img_size,\n",
" img_dir = test_images_dir)\n",
"best_model = keras.models.load_model(MODEL_NAME, compile=False)"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"6\" halign=\"left\">Label</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Diagnosis</th>\n",
" <th>any</th>\n",
" <th>epidural</th>\n",
" <th>intraparenchymal</th>\n",
" <th>intraventricular</th>\n",
" <th>subarachnoid</th>\n",
" <th>subdural</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Image</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>ID_000000e27</th>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ID_000009146</th>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ID_00007b8cb</th>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ID_000134952</th>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ID_000176f2a</th>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Label \\\n",
"Diagnosis any epidural intraparenchymal intraventricular subarachnoid \n",
"Image \n",
"ID_000000e27 0.5 0.5 0.5 0.5 0.5 \n",
"ID_000009146 0.5 0.5 0.5 0.5 0.5 \n",
"ID_00007b8cb 0.5 0.5 0.5 0.5 0.5 \n",
"ID_000134952 0.5 0.5 0.5 0.5 0.5 \n",
"ID_000176f2a 0.5 0.5 0.5 0.5 0.5 \n",
"\n",
" \n",
"Diagnosis subdural \n",
"Image \n",
"ID_000000e27 0.5 \n",
"ID_000009146 0.5 \n",
"ID_00007b8cb 0.5 \n",
"ID_000134952 0.5 \n",
"ID_000176f2a 0.5 "
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#test_df shape: (121232, 6) -- 121232 files in stage_2_test via keil$ ls -1 stage_2_test_images/ | wc -l | less\n",
"assert len(test_generator.indices) == len(test_df == len(test_generator.list_IDs)) #checks out\n",
"\n",
"\n",
"test_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"What is going on is the batch size is not evenly divisable by the img count in the test2_stage of 121232/batch of 20 = remainder of 8 images thus the size of 121240 which I was seeing. Confirming now by using a batchsize of 16 which is evenly divisible... will confirm again via batch size = 1"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"7577/7577 [==============================] - 6483s 856ms/step\n"
]
}
],
"source": [
"# step through the functon line by line:\n",
"\n",
"# create_submission(best_model, test_generator, test_df)\n",
"# def create_submission(model, data, test_df):\n",
"\n",
"pred_batch16 = best_model.predict_generator(test_generator, verbose=1)"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(121232, 6)"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pred_batch16.shape #good to go.... :D ffs"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
"# After getting predictions here is some pandas gymnastics...\n",
"out_df = pd.DataFrame(pred_batch16, index=test_df.index, columns=test_df.columns)\n",
"\n",
"\n",
"test_df = out_df.stack().reset_index()\n",
"\n",
"\n",
"test_df.insert(loc=0, column='ID', value=test_df['Image'].astype(str) + \"_\" + test_df['Diagnosis'])\n",
"\n",
"\n",
"test_df = test_df.drop([\"Image\", \"Diagnosis\"], axis=1)\n",
"\n",
"\n",
"test_df.to_csv('../submissions/stage2-final-submission-v2.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(121240, 6)"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pred.shape"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"temp_df = pd.DataFrame(pred)\n",
"temp_df.to_csv('./temp_csv.csv')"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.117452</td>\n",
" <td>0.000942</td>\n",
" <td>0.067592</td>\n",
" <td>0.000453</td>\n",
" <td>0.052313</td>\n",
" <td>0.011529</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.001256</td>\n",
" <td>0.000010</td>\n",
" <td>0.000121</td>\n",
" <td>0.000128</td>\n",
" <td>0.000440</td>\n",
" <td>0.000986</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.002467</td>\n",
" <td>0.000215</td>\n",
" <td>0.003454</td>\n",
" <td>0.000158</td>\n",
" <td>0.000787</td>\n",
" <td>0.001039</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.002803</td>\n",
" <td>0.000091</td>\n",
" <td>0.000339</td>\n",
" <td>0.000042</td>\n",
" <td>0.001047</td>\n",
" <td>0.001354</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.002144</td>\n",
" <td>0.000046</td>\n",
" <td>0.000286</td>\n",
" <td>0.000154</td>\n",
" <td>0.000292</td>\n",
" <td>0.002259</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5\n",
"0 0.117452 0.000942 0.067592 0.000453 0.052313 0.011529\n",
"1 0.001256 0.000010 0.000121 0.000128 0.000440 0.000986\n",
"2 0.002467 0.000215 0.003454 0.000158 0.000787 0.001039\n",
"3 0.002803 0.000091 0.000339 0.000042 0.001047 0.001354\n",
"4 0.002144 0.000046 0.000286 0.000154 0.000292 0.002259"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"temp_df.head()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}