1273 lines (1272 with data), 47.3 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [
"import os\n",
"import pickle\n",
"import random\n",
"import glob\n",
"from glob import glob\n",
"import datetime\n",
"import pandas as pd\n",
"import numpy as np\n",
"import cv2\n",
"import pydicom\n",
"from tqdm import tqdm\n",
"from joblib import delayed, Parallel\n",
"import zipfile\n",
"from pydicom.filebase import DicomBytesIO\n",
"import sys\n",
"from PIL import Image\n",
"import cv2\n",
"import pickle\n",
"\n",
"\n",
"import click\n",
"\n",
"\n",
"\n",
"from joblib import delayed, Parallel\n",
"import random\n",
"\n",
"\n",
"from scipy import ndimage\n",
"import pydicom\n",
"from skimage import exposure\n",
"\n",
"base_url = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/'\n",
"TRAIN_DIR = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/stage_2_train/'\n",
"TEST_DIR = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/stage_2_test/'\n",
"os.listdir(base_url)\n",
"\n",
"import keras\n",
"from keras.models import model_from_json\n",
"import tensorflow as tf\n",
"from keras.models import Sequential, Model\n",
"from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, GlobalAveragePooling2D\n",
"from keras.applications.inception_v3 import InceptionV3\n",
"\n",
"# importing pyplot and image from matplotlib \n",
"import matplotlib.pyplot as plt \n",
"import matplotlib.image as mpimg \n",
"\n",
"\n",
"from keras.preprocessing import image\n",
"import albumentations as A\n",
"\n",
"\n",
"\n",
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"base_url = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/'\n",
"train_url = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/png/train/adjacent-brain-cropped/'\n",
"dcm_url = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/stage_2_train/'"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Diagnosis</th>\n",
" <th>any</th>\n",
" <th>epidural</th>\n",
" <th>intraparenchymal</th>\n",
" <th>intraventricular</th>\n",
" <th>subarachnoid</th>\n",
" <th>subdural</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>ID_000012eaf</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ID_000039fa0</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ID_00005679d</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ID_00008ce3c</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ID_0000950d7</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Diagnosis any epidural intraparenchymal intraventricular subarachnoid \\\n",
"ImageID \n",
"ID_000012eaf 0 0 0 0 0 \n",
"ID_000039fa0 0 0 0 0 0 \n",
"ID_00005679d 0 0 0 0 0 \n",
"ID_00008ce3c 0 0 0 0 0 \n",
"ID_0000950d7 0 0 0 0 0 \n",
"\n",
"Diagnosis subdural \n",
"ImageID \n",
"ID_000012eaf 0 \n",
"ID_000039fa0 0 \n",
"ID_00005679d 0 \n",
"ID_00008ce3c 0 \n",
"ID_0000950d7 0 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_df = pd.read_csv(f'{base_url}/stage_2_train.csv').drop_duplicates()\n",
"train_df['ImageID'] = train_df['ID'].str.slice(stop=12)\n",
"train_df['Diagnosis'] = train_df['ID'].str.slice(start=13)\n",
"train_labels = train_df.pivot(index=\"ImageID\", columns=\"Diagnosis\", values=\"Label\")\n",
"train_labels.head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"train_metadata = pd.read_parquet(f'{base_url}/train_metadata.parquet.gzip')\n",
"test_metadata = pd.read_parquet(f'{base_url}/test_metadata.parquet.gzip')\n",
"\n",
"train_metadata[\"Dataset\"] = \"train\"\n",
"test_metadata[\"Dataset\"] = \"test\"\n",
"\n",
"train_metadata = train_metadata.join(train_labels)\n",
"\n",
"metadata = pd.concat([train_metadata, test_metadata], sort=True)\n",
"metadata.sort_values(by=\"ImagePositionPatient_2\", inplace=True, ascending=False)\n",
"metadata.sort_values(['PatientID','ImagePositionPatient_2'],inplace=True)\n",
"metadata.drop(['ID_6431af929'],inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>BitsAllocated</th>\n",
" <th>BitsStored</th>\n",
" <th>Columns</th>\n",
" <th>Dataset</th>\n",
" <th>HighBit</th>\n",
" <th>ImageOrientationPatient_0</th>\n",
" <th>ImageOrientationPatient_1</th>\n",
" <th>ImageOrientationPatient_2</th>\n",
" <th>ImageOrientationPatient_3</th>\n",
" <th>ImageOrientationPatient_4</th>\n",
" <th>...</th>\n",
" <th>StudyID</th>\n",
" <th>StudyInstanceUID</th>\n",
" <th>WindowCenter</th>\n",
" <th>WindowWidth</th>\n",
" <th>any</th>\n",
" <th>epidural</th>\n",
" <th>intraparenchymal</th>\n",
" <th>intraventricular</th>\n",
" <th>subarachnoid</th>\n",
" <th>subdural</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Image</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>ID_45785016b</th>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>512</td>\n",
" <td>train</td>\n",
" <td>15</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.993572</td>\n",
" <td>...</td>\n",
" <td></td>\n",
" <td>ID_66929e09d4</td>\n",
" <td>30.0</td>\n",
" <td>80.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ID_37f32aed2</th>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>512</td>\n",
" <td>train</td>\n",
" <td>15</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.993572</td>\n",
" <td>...</td>\n",
" <td></td>\n",
" <td>ID_66929e09d4</td>\n",
" <td>30.0</td>\n",
" <td>80.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ID_1b9de2922</th>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>512</td>\n",
" <td>train</td>\n",
" <td>15</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.993572</td>\n",
" <td>...</td>\n",
" <td></td>\n",
" <td>ID_66929e09d4</td>\n",
" <td>30.0</td>\n",
" <td>80.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ID_d61a6a7b9</th>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>512</td>\n",
" <td>train</td>\n",
" <td>15</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.993572</td>\n",
" <td>...</td>\n",
" <td></td>\n",
" <td>ID_66929e09d4</td>\n",
" <td>30.0</td>\n",
" <td>80.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ID_406c82112</th>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>512</td>\n",
" <td>train</td>\n",
" <td>15</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.993572</td>\n",
" <td>...</td>\n",
" <td></td>\n",
" <td>ID_66929e09d4</td>\n",
" <td>30.0</td>\n",
" <td>80.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 36 columns</p>\n",
"</div>"
],
"text/plain": [
" BitsAllocated BitsStored Columns Dataset HighBit \\\n",
"Image \n",
"ID_45785016b 16 16 512 train 15 \n",
"ID_37f32aed2 16 16 512 train 15 \n",
"ID_1b9de2922 16 16 512 train 15 \n",
"ID_d61a6a7b9 16 16 512 train 15 \n",
"ID_406c82112 16 16 512 train 15 \n",
"\n",
" ImageOrientationPatient_0 ImageOrientationPatient_1 \\\n",
"Image \n",
"ID_45785016b 1.0 0.0 \n",
"ID_37f32aed2 1.0 0.0 \n",
"ID_1b9de2922 1.0 0.0 \n",
"ID_d61a6a7b9 1.0 0.0 \n",
"ID_406c82112 1.0 0.0 \n",
"\n",
" ImageOrientationPatient_2 ImageOrientationPatient_3 \\\n",
"Image \n",
"ID_45785016b 0.0 0.0 \n",
"ID_37f32aed2 0.0 0.0 \n",
"ID_1b9de2922 0.0 0.0 \n",
"ID_d61a6a7b9 0.0 0.0 \n",
"ID_406c82112 0.0 0.0 \n",
"\n",
" ImageOrientationPatient_4 ... StudyID StudyInstanceUID \\\n",
"Image ... \n",
"ID_45785016b 0.993572 ... ID_66929e09d4 \n",
"ID_37f32aed2 0.993572 ... ID_66929e09d4 \n",
"ID_1b9de2922 0.993572 ... ID_66929e09d4 \n",
"ID_d61a6a7b9 0.993572 ... ID_66929e09d4 \n",
"ID_406c82112 0.993572 ... ID_66929e09d4 \n",
"\n",
" WindowCenter WindowWidth any epidural intraparenchymal \\\n",
"Image \n",
"ID_45785016b 30.0 80.0 0.0 0.0 0.0 \n",
"ID_37f32aed2 30.0 80.0 0.0 0.0 0.0 \n",
"ID_1b9de2922 30.0 80.0 0.0 0.0 0.0 \n",
"ID_d61a6a7b9 30.0 80.0 0.0 0.0 0.0 \n",
"ID_406c82112 30.0 80.0 0.0 0.0 0.0 \n",
"\n",
" intraventricular subarachnoid subdural \n",
"Image \n",
"ID_45785016b 0.0 0.0 0.0 \n",
"ID_37f32aed2 0.0 0.0 0.0 \n",
"ID_1b9de2922 0.0 0.0 0.0 \n",
"ID_d61a6a7b9 0.0 0.0 0.0 \n",
"ID_406c82112 0.0 0.0 0.0 \n",
"\n",
"[5 rows x 36 columns]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"metadata.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"test_df = metadata[metadata['Dataset'] == 'test'].iloc[:,:-6].drop(['Dataset'], axis= 1)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"train_df = metadata[metadata['Dataset'] == 'train'].drop(['Dataset'],axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(752802, 6)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_y = train_df[['any','epidural','intraparenchymal','intraventricular', 'subarachnoid','subdural']]\n",
"train_y.shape"
]
},
{
"cell_type": "code",
"execution_count": 126,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(752802, 35)"
]
},
"execution_count": 126,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_df.shape"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"18938"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_df.PatientID.nunique()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Dividing dataset into K-fold with equal patientID"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"with open('df.pkl', 'rb') as f:\n",
" df = pickle.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th colspan=\"6\" halign=\"left\">Label</th>\n",
" <th>filepath</th>\n",
" <th>PatientID</th>\n",
" <th>StudyID</th>\n",
" <th>SeriesID</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Subtype</th>\n",
" <th></th>\n",
" <th>any</th>\n",
" <th>epidural</th>\n",
" <th>intraparenchymal</th>\n",
" <th>intraventricular</th>\n",
" <th>subarachnoid</th>\n",
" <th>subdural</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>752798</th>\n",
" <td>ID_ffff82e46</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>/home/ubuntu/kaggle/rsna-intracranial-hemorrha...</td>\n",
" <td>ID_a85c9d08</td>\n",
" <td>ID_eca4bf46ac</td>\n",
" <td>ID_3ef9b97743</td>\n",
" </tr>\n",
" <tr>\n",
" <th>752799</th>\n",
" <td>ID_ffff922b9</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>/home/ubuntu/kaggle/rsna-intracranial-hemorrha...</td>\n",
" <td>ID_5964c5e5</td>\n",
" <td>ID_b47ca0ad05</td>\n",
" <td>ID_6d2a9b2810</td>\n",
" </tr>\n",
" <tr>\n",
" <th>752800</th>\n",
" <td>ID_ffffb670a</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>/home/ubuntu/kaggle/rsna-intracranial-hemorrha...</td>\n",
" <td>ID_4f7414e4</td>\n",
" <td>ID_ffb2e70ba3</td>\n",
" <td>ID_87b33b4a10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>752801</th>\n",
" <td>ID_ffffcbff8</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>/home/ubuntu/kaggle/rsna-intracranial-hemorrha...</td>\n",
" <td>ID_a5382712</td>\n",
" <td>ID_ff0ba45814</td>\n",
" <td>ID_bd174db91c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>752802</th>\n",
" <td>ID_fffff9393</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>/home/ubuntu/kaggle/rsna-intracranial-hemorrha...</td>\n",
" <td>ID_41db05df</td>\n",
" <td>ID_7c887292d5</td>\n",
" <td>ID_dff8d8efd5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Label \\\n",
"Subtype any epidural intraparenchymal intraventricular \n",
"752798 ID_ffff82e46 0 0 0 0 \n",
"752799 ID_ffff922b9 1 0 0 1 \n",
"752800 ID_ffffb670a 1 0 0 0 \n",
"752801 ID_ffffcbff8 0 0 0 0 \n",
"752802 ID_fffff9393 0 0 0 0 \n",
"\n",
" \\\n",
"Subtype subarachnoid subdural \n",
"752798 0 0 \n",
"752799 0 0 \n",
"752800 1 0 \n",
"752801 0 0 \n",
"752802 0 0 \n",
"\n",
" filepath PatientID \\\n",
"Subtype \n",
"752798 /home/ubuntu/kaggle/rsna-intracranial-hemorrha... ID_a85c9d08 \n",
"752799 /home/ubuntu/kaggle/rsna-intracranial-hemorrha... ID_5964c5e5 \n",
"752800 /home/ubuntu/kaggle/rsna-intracranial-hemorrha... ID_4f7414e4 \n",
"752801 /home/ubuntu/kaggle/rsna-intracranial-hemorrha... ID_a5382712 \n",
"752802 /home/ubuntu/kaggle/rsna-intracranial-hemorrha... ID_41db05df \n",
"\n",
" StudyID SeriesID \n",
"Subtype \n",
"752798 ID_eca4bf46ac ID_3ef9b97743 \n",
"752799 ID_b47ca0ad05 ID_6d2a9b2810 \n",
"752800 ID_ffb2e70ba3 ID_87b33b4a10 \n",
"752801 ID_ff0ba45814 ID_bd174db91c \n",
"752802 ID_7c887292d5 ID_dff8d8efd5 "
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Distribution of number of CT-Scan for Each PatientID"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAATdklEQVR4nO3de4xcZ3nH8e9DDLmZxg4JK9d26yAsmhA3t1VimgqtE5o4CcJRlUhGETjIrf8JaqhSgdOKhksiggoEkADVwinhIpY0hMZygNRyvG2plJvJxXZMZEOs4CSNoXZMnUCE6dM/5l2Ybma9F+/OHPv9fqTRznnPe+Y858zMb86+c2YmMhNJUh1e0+sCJEndY+hLUkUMfUmqiKEvSRUx9CWpIjN6XcChnHLKKblgwYJJL//SSy9x4oknTl1BU6SpdUFza2tqXdDc2ppaFzS3tqbWBROrbfPmzT/PzFM7zszMxl7OO++8PBybNm06rOWnS1PrymxubU2tK7O5tTW1rszm1tbUujInVhvwSI6Sqw7vSFJFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRRr9NQy1WLD63o7tu269osuVSDraeaQvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqyLhDPyKOiYhHI2J9mT4tIh6MiB0R8a2IeF1pP7ZM7yzzF7Tdxo2l/amIuHSqN0aSdGgTOdK/HtjeNv1J4LbMXAjsA1aW9pXAvsx8M3Bb6UdEnAEsB94KLAW+GBHHHF75kqSJGFfoR8Q84Argy2U6gIuAu0qXO4Ary/VlZZoy/+LSfxkwmJmvZObTwE7g/KnYCEnS+ERmjt0p4i7gE8Drgb8BrgUeKEfzRMR84HuZeWZEbAWWZubuMu/HwAXAR8oyXy/ta8syd41Y1ypgFUBfX995g4ODk964AwcOMHPmzEkvP11G1rXl2f0d+y2ae1K3SvqtI2WfNUlTa2tqXdDc2ppaF0ystiVLlmzOzP5O82aMtXBEvBPYk5mbI2JguLlD1xxj3qGW+V1D5hpgDUB/f38ODAyM7DJuQ0NDHM7y02VkXdeuvrdjv13XDHRsn05Hyj5rkqbW1tS6oLm1NbUumLraxgx94ELgXRFxOXAc8HvAZ4FZETEjMw8C84DnSv/dwHxgd0TMAE4C9ra1D2tfRpLUBWOO6WfmjZk5LzMX0Hoj9v7MvAbYBFxVuq0A7inX15Vpyvz7szWGtA5YXs7uOQ1YCDw0ZVsiSRrTeI70R/MhYDAibgYeBdaW9rXA1yJiJ60j/OUAmbktIu4EngQOAtdl5m8OY/2SpAmaUOhn5hAwVK7/hA5n32Tmr4CrR1n+FuCWiRYpSZoafiJXkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUZM/Qj4riIeCgiHo+IbRHx0dJ+WkQ8GBE7IuJbEfG60n5smd5Z5i9ou60bS/tTEXHpdG2UJKmz8RzpvwJclJlnAWcDSyNiMfBJ4LbMXAjsA1aW/iuBfZn5ZuC20o+IOANYDrwVWAp8MSKOmcqNkSQd2pihny0HyuRryyWBi4C7SvsdwJXl+rIyTZl/cUREaR/MzFcy82lgJ3D+lGyFJGlcIjPH7tQ6It8MvBn4AvAPwAPlaJ6ImA98LzPPjIitwNLM3F3m/Ri4APhIWebrpX1tWeauEetaBawC6OvrO29wcHDSG3fgwAFmzpw56eWny8i6tjy7v2O/RXNP6lZJv3Wk7LMmaWptTa0LmltbU+uCidW2ZMmSzZnZ32nejPHcQGb+Bjg7ImYB3wFO79St/I1R5o3WPnJda4A1AP39/TkwMDCeEjsaGhricJafLiPrunb1vR377bpmoGP7dDpS9lmTNLW2ptYFza2tqXXB1NU2obN3MvNFYAhYDMyKiOEXjXnAc+X6bmA+QJl/ErC3vb3DMpKkLhjP2TunliN8IuJ44B3AdmATcFXptgK4p1xfV6Yp8+/P1hjSOmB5ObvnNGAh8NBUbYgkaWzjGd6ZA9xRxvVfA9yZmesj4klgMCJuBh4F1pb+a4GvRcROWkf4ywEyc1tE3Ak8CRwErivDRpKkLhkz9DPzCeCcDu0/ocPZN5n5K+DqUW7rFuCWiZcpSZoKfiJXkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFZnR6wI0dRasvnfUebtuvaKLlUhqKkN/GowWvk0M3pG13rDoINeuvreRtUo6fA7vSFJFDH1JqoihL0kVMfQlqSKGviRVZMzQj4j5EbEpIrZHxLaIuL60nxwRGyJiR/k7u7RHRHw+InZGxBMRcW7bba0o/XdExIrp2yxJUifjOdI/CNyQmacDi4HrIuIMYDWwMTMXAhvLNMBlwMJyWQV8CVovEsBNwAXA+cBNwy8UkqTuGDP0M/P5zPxhuf4/wHZgLrAMuKN0uwO4slxfBnw1Wx4AZkXEHOBSYENm7s3MfcAGYOmUbo0k6ZAmNKYfEQuAc4AHgb7MfB5aLwzAG0u3ucBP2xbbXdpGa5ckdUlk5vg6RswE/g24JTPvjogXM3NW2/x9mTk7Iu4FPpGZPyjtG4EPAhcBx2bmzaX9w8DLmfnpEetZRWtYiL6+vvMGBwcnvXEHDhxg5syZk15+srY8u79j+6K5JwGvrmus/oe73kPd1shl+o6HF3458XVPt17dl+PR1NqaWhc0t7am1gUTq23JkiWbM7O/07xxfQ1DRLwW+Dbwjcy8uzS/EBFzMvP5Mnyzp7TvBua3LT4PeK60D4xoHxq5rsxcA6wB6O/vz4GBgZFdxm1oaIjDWX6yrh3taxiuGQBeXddY/Q93vYe6rZHL3LDoIJ/eMmPC655uvbovx6OptTW1LmhubU2tC6autvGcvRPAWmB7Zn6mbdY6YPgMnBXAPW3t7y1n8SwG9pfhn/uASyJidnkD95LSJknqkvEc6V8IvAfYEhGPlba/BW4F7oyIlcAzwNVl3neBy4GdwMvA+wAyc29EfBx4uPT7WGbunZKtkCSNy5ihX8bmY5TZF3fon8B1o9zW7cDtEylQkjR1/ESuJFXE0Jekihj6klQRQ1+SKuLPJTbYkfSzi5KODB7pS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkX85awuGv4lrBsWHeTaUX4VS5Kmk0f6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFRkz9CPi9ojYExFb29pOjogNEbGj/J1d2iMiPh8ROyPiiYg4t22ZFaX/johYMT2bI0k6lPEc6X8FWDqibTWwMTMXAhvLNMBlwMJyWQV8CVovEsBNwAXA+cBNwy8UkqTuGTP0M/Pfgb0jmpcBd5TrdwBXtrV/NVseAGZFxBzgUmBDZu7NzH3ABl79QiJJmmaRmWN3ilgArM/MM8v0i5k5q23+vsycHRHrgVsz8welfSPwIWAAOC4zby7tHwZ+mZmf6rCuVbT+S6Cvr++8wcHBSW/cgQMHmDlz5qSXn6wtz+4/5Py+4+GFX07+9hfNPWnC6x3vMsO1jda/V3p1X45HU2tral3Q3NqaWhdMrLYlS5Zszsz+TvOm+pezokNbHqL91Y2Za4A1AP39/TkwMDDpYoaGhjic5SdrrF/FumHRQT69ZfK7ftc1AxNe73iXGa5ttP690qv7cjyaWltT64Lm1tbUumDqapvs2TsvlGEbyt89pX03ML+t3zzguUO0S5K6aLKhvw4YPgNnBXBPW/t7y1k8i4H9mfk8cB9wSUTMLm/gXlLaJEldNOYYQ0R8k9aY/CkRsZvWWTi3AndGxErgGeDq0v27wOXATuBl4H0Ambk3Ij4OPFz6fSwzR745LEmaZmOGfma+e5RZF3fom8B1o9zO7cDtE6pOkjSl/ESuJFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRab6l7PUBQvG+GUuSRqNR/qSVBFDX5Iq4vBOJaZ7SGi029916xXTul5JE+ORviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcRP5KojP2ErHZ080pekihj6klQRh3fUEw4fSb1h6Etd4gudmsDhHUmqiKEvSRVxeEeapOHhmhsWHeTatqGb6R6umegwkcNKamfot/HJMfX8xa7Jm+591w2jbcNXlp7Y5Uo0zNBXVQ4VpEf6C8XIbRv5H4gEPQj9iFgKfA44BvhyZt7a7RokTc7R/J9VLboa+hFxDPAF4M+A3cDDEbEuM5/sZh0T5QP9d3o1XHMkDQdMdB81cRinadswVc/BXj6Xm5Ij3T7SPx/YmZk/AYiIQWAZ0OjQH00Tn6xHqy3P7p/QUMVU3jdH6/3cxO3qVU2jvSl/KFMV1t1+MYjMnJYb7riyiKuApZn5F2X6PcAFmfn+tj6rgFVl8i3AU4exylOAnx/G8tOlqXVBc2tral3Q3NqaWhc0t7am1gUTq+0PM/PUTjO6faQfHdr+36tOZq4B1kzJyiIeycz+qbitqdTUuqC5tTW1LmhubU2tC5pbW1PrgqmrrdsfztoNzG+bngc81+UaJKla3Q79h4GFEXFaRLwOWA6s63INklStrg7vZObBiHg/cB+tUzZvz8xt07jKKRkmmgZNrQuaW1tT64Lm1tbUuqC5tTW1LpiqYe9uvpErSeotv3BNkipi6EtSRY6K0I+I+RGxKSK2R8S2iLi+tJ8cERsiYkf5O7sHtR0XEQ9FxOOlto+W9tMi4sFS27fKG9tdFxHHRMSjEbG+YXXtiogtEfFYRDxS2ppwf86KiLsi4kfl8fa2htT1lrKvhi+/iIgPNKS2vy6P/a0R8c3ynGjK4+z6Ute2iPhAaev6PouI2yNiT0RsbWvrWEe0fD4idkbEExFx7kTWdVSEPnAQuCEzTwcWA9dFxBnAamBjZi4ENpbpbnsFuCgzzwLOBpZGxGLgk8BtpbZ9wMoe1AZwPbC9bbopdQEsycyz285NbsL9+Tng+5n5R8BZtPZdz+vKzKfKvjobOA94GfhOr2uLiLnAXwH9mXkmrRM4ltOAx1lEnAn8Ja1vCjgLeGdELKQ3++wrwNIRbaPVcRmwsFxWAV+a0Joy86i7APfQ+n6fp4A5pW0O8FSP6zoB+CFwAa1P1s0o7W8D7utBPfPKg+kiYD2tD8/1vK6y7l3AKSPaenp/Ar8HPE05AaIpdXWo8xLgP5tQGzAX+ClwMq2zBdcDlzbhcQZcTetLH4enPwx8sFf7DFgAbB3rcQX8I/DuTv3GczlajvR/KyIWAOcADwJ9mfk8QPn7xh7VdExEPAbsATYAPwZezMyDpctuWk+ObvssrQf5/5bpNzSkLmh9UvtfI2Jz+WoO6P39+SbgZ8A/lSGxL0fEiQ2oa6TlwDfL9Z7WlpnPAp8CngGeB/YDm2nG42wr8PaIeENEnABcTuvDo025P0erY/iFdNiE9t9RFfoRMRP4NvCBzPxFr+sZlpm/yda/3fNo/St5eqdu3awpIt4J7MnMze3NHbr26pzeCzPzXFr/yl4XEW/vUR3tZgDnAl/KzHOAl+jNENOoytj4u4B/7nUtAGUcehlwGvD7wIm07tORuv44y8zttIaZNgDfBx6nNVTcdIf1PD1qQj8iXksr8L+RmXeX5hciYk6ZP4fWkXbPZOaLwBCt9x1mRcTwh+N68XUUFwLviohdwCCtIZ7PNqAuADLzufJ3D62x6fPp/f25G9idmQ+W6btovQj0uq52lwE/zMwXynSva3sH8HRm/iwzfw3cDfwJzXmcrc3MczPz7cBeYAe932fDRqvjsL7O5qgI/YgIYC2wPTM/0zZrHbCiXF9Ba6y/27WdGhGzyvXjaT0JtgObgKt6VVtm3piZ8zJzAa3hgPsz85pe1wUQESdGxOuHr9Mao95Kj+/PzPwv4KcR8ZbSdDGtrwXv+eOszbv53dAO9L62Z4DFEXFCeZ4O77OeP84AIuKN5e8fAH9Oa9/1ep8NG62OdcB7y1k8i4H9w8NA49LtN0+m6Q2QP6X1780TwGPlcjmtMeqNtF69NwIn96C2PwYeLbVtBf6+tL8JeAjYSetf8WN7uP8GgPVNqavU8Hi5bAP+rrQ34f48G3ik3J//AsxuQl2lthOA/wZOamvreW3AR4Eflcf/14Bjm/A4K7X9B60XoceBi3u1z2i92DwP/JrWkfzK0eqgNbzzBVrvDW6hdWbUuNfl1zBIUkWOiuEdSdL4GPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIv8HcUE1AMKbz9IAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"tmp = df[\"PatientID\"].value_counts()\n",
"tmp[tmp<100].hist(bins=50);"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"18938\n"
]
}
],
"source": [
"patient_id_train = set(df[\"PatientID\"].unique())\n",
"print(len(patient_id_train))"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Image</th>\n",
" <th>SOPInstanceUID</th>\n",
" <th>PatientID</th>\n",
" <th>any</th>\n",
" <th>epidural</th>\n",
" <th>intraparenchymal</th>\n",
" <th>intraventricular</th>\n",
" <th>subarachnoid</th>\n",
" <th>subdural</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>ID_45785016b</td>\n",
" <td>ID_45785016b</td>\n",
" <td>ID_0002cd41</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>ID_37f32aed2</td>\n",
" <td>ID_37f32aed2</td>\n",
" <td>ID_0002cd41</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>ID_1b9de2922</td>\n",
" <td>ID_1b9de2922</td>\n",
" <td>ID_0002cd41</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>ID_d61a6a7b9</td>\n",
" <td>ID_d61a6a7b9</td>\n",
" <td>ID_0002cd41</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>ID_406c82112</td>\n",
" <td>ID_406c82112</td>\n",
" <td>ID_0002cd41</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Image SOPInstanceUID PatientID any epidural intraparenchymal \\\n",
"0 ID_45785016b ID_45785016b ID_0002cd41 0.0 0.0 0.0 \n",
"1 ID_37f32aed2 ID_37f32aed2 ID_0002cd41 0.0 0.0 0.0 \n",
"2 ID_1b9de2922 ID_1b9de2922 ID_0002cd41 0.0 0.0 0.0 \n",
"3 ID_d61a6a7b9 ID_d61a6a7b9 ID_0002cd41 0.0 0.0 0.0 \n",
"4 ID_406c82112 ID_406c82112 ID_0002cd41 0.0 0.0 0.0 \n",
"\n",
" intraventricular subarachnoid subdural \n",
"0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 "
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train = train_df[['SOPInstanceUID','PatientID','any','epidural','intraparenchymal','intraventricular','subarachnoid','subdural']]\n",
"train.reset_index(inplace=True)\n",
"train.head()"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(752802, 9)\n",
"(752802, 9)\n"
]
}
],
"source": [
"IGNORE_IDS = ['ID_6431af929']\n",
"print(train.shape)\n",
"train = train[~train['SOPInstanceUID'].isin(IGNORE_IDS)]\n",
"print(train.shape)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(18938,)\n"
]
}
],
"source": [
"patient_id = train[\"PatientID\"].unique() \n",
"print(patient_id.shape) "
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(151865, 10)\n",
"(148063, 10)\n",
"(151306, 10)\n",
"(150597, 10)\n",
"(150971, 10)\n"
]
}
],
"source": [
"from sklearn.model_selection import KFold\n",
"\n",
"seed = 2020\n",
"n_splits = 5\n",
"kf = KFold(n_splits=n_splits, shuffle=True, random_state=seed) # StratifiedKFold\n",
"\n",
"fold = 0\n",
"for train_index, valid_index in kf.split(patient_id):\n",
" df_train = train[train[\"PatientID\"].isin(patient_id[train_index])]\n",
" df_valid = train[train[\"PatientID\"].isin(patient_id[valid_index])]\n",
" \n",
" df_train.to_csv(\"train_{}.csv\".format(fold), index=None)\n",
" df_valid.to_csv(\"valid_{}.csv\".format(fold), index=None)\n",
" \n",
" fold += 1\n",
" print(df_valid.shape)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Image</th>\n",
" <th>SOPInstanceUID</th>\n",
" <th>PatientID</th>\n",
" <th>any</th>\n",
" <th>epidural</th>\n",
" <th>intraparenchymal</th>\n",
" <th>intraventricular</th>\n",
" <th>subarachnoid</th>\n",
" <th>subdural</th>\n",
" <th>set</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>ID_138d275c8</td>\n",
" <td>ID_138d275c8</td>\n",
" <td>ID_00054f3f</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>ID_447fa09d9</td>\n",
" <td>ID_447fa09d9</td>\n",
" <td>ID_00054f3f</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>ID_0f1298f68</td>\n",
" <td>ID_0f1298f68</td>\n",
" <td>ID_00054f3f</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>ID_c24918b79</td>\n",
" <td>ID_c24918b79</td>\n",
" <td>ID_00054f3f</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>ID_c0005a263</td>\n",
" <td>ID_c0005a263</td>\n",
" <td>ID_00054f3f</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>752797</th>\n",
" <td>ID_72e823e2c</td>\n",
" <td>ID_72e823e2c</td>\n",
" <td>ID_fffc2bd6</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>752798</th>\n",
" <td>ID_4184c4f03</td>\n",
" <td>ID_4184c4f03</td>\n",
" <td>ID_fffc2bd6</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>752799</th>\n",
" <td>ID_a8aca4f40</td>\n",
" <td>ID_a8aca4f40</td>\n",
" <td>ID_fffc2bd6</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>752800</th>\n",
" <td>ID_716b72762</td>\n",
" <td>ID_716b72762</td>\n",
" <td>ID_fffc2bd6</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>752801</th>\n",
" <td>ID_deb85caf0</td>\n",
" <td>ID_deb85caf0</td>\n",
" <td>ID_fffc2bd6</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>601831 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" Image SOPInstanceUID PatientID any epidural \\\n",
"36 ID_138d275c8 ID_138d275c8 ID_00054f3f 0.0 0.0 \n",
"37 ID_447fa09d9 ID_447fa09d9 ID_00054f3f 0.0 0.0 \n",
"38 ID_0f1298f68 ID_0f1298f68 ID_00054f3f 0.0 0.0 \n",
"39 ID_c24918b79 ID_c24918b79 ID_00054f3f 0.0 0.0 \n",
"40 ID_c0005a263 ID_c0005a263 ID_00054f3f 0.0 0.0 \n",
"... ... ... ... ... ... \n",
"752797 ID_72e823e2c ID_72e823e2c ID_fffc2bd6 0.0 0.0 \n",
"752798 ID_4184c4f03 ID_4184c4f03 ID_fffc2bd6 0.0 0.0 \n",
"752799 ID_a8aca4f40 ID_a8aca4f40 ID_fffc2bd6 0.0 0.0 \n",
"752800 ID_716b72762 ID_716b72762 ID_fffc2bd6 0.0 0.0 \n",
"752801 ID_deb85caf0 ID_deb85caf0 ID_fffc2bd6 0.0 0.0 \n",
"\n",
" intraparenchymal intraventricular subarachnoid subdural set \n",
"36 0.0 0.0 0.0 0.0 0 \n",
"37 0.0 0.0 0.0 0.0 0 \n",
"38 0.0 0.0 0.0 0.0 0 \n",
"39 0.0 0.0 0.0 0.0 0 \n",
"40 0.0 0.0 0.0 0.0 0 \n",
"... ... ... ... ... ... \n",
"752797 0.0 0.0 0.0 0.0 0 \n",
"752798 0.0 0.0 0.0 0.0 0 \n",
"752799 0.0 0.0 0.0 0.0 0 \n",
"752800 0.0 0.0 0.0 0.0 0 \n",
"752801 0.0 0.0 0.0 0.0 0 \n",
"\n",
"[601831 rows x 10 columns]"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_train"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}