--- a +++ b/Notebook/Week 3/k-fold.ipynb @@ -0,0 +1,1272 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "import os\n", + "import pickle\n", + "import random\n", + "import glob\n", + "from glob import glob\n", + "import datetime\n", + "import pandas as pd\n", + "import numpy as np\n", + "import cv2\n", + "import pydicom\n", + "from tqdm import tqdm\n", + "from joblib import delayed, Parallel\n", + "import zipfile\n", + "from pydicom.filebase import DicomBytesIO\n", + "import sys\n", + "from PIL import Image\n", + "import cv2\n", + "import pickle\n", + "\n", + "\n", + "import click\n", + "\n", + "\n", + "\n", + "from joblib import delayed, Parallel\n", + "import random\n", + "\n", + "\n", + "from scipy import ndimage\n", + "import pydicom\n", + "from skimage import exposure\n", + "\n", + "base_url = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/'\n", + "TRAIN_DIR = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/stage_2_train/'\n", + "TEST_DIR = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/stage_2_test/'\n", + "os.listdir(base_url)\n", + "\n", + "import keras\n", + "from keras.models import model_from_json\n", + "import tensorflow as tf\n", + "from keras.models import Sequential, Model\n", + "from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, GlobalAveragePooling2D\n", + "from keras.applications.inception_v3 import InceptionV3\n", + "\n", + "# importing pyplot and image from matplotlib \n", + "import matplotlib.pyplot as plt \n", + "import matplotlib.image as mpimg \n", + "\n", + "\n", + "from keras.preprocessing import image\n", + "import albumentations as A\n", + "\n", + "\n", + "\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "base_url = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/'\n", + "train_url = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/png/train/adjacent-brain-cropped/'\n", + "dcm_url = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/stage_2_train/'" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th>Diagnosis</th>\n", + " <th>any</th>\n", + " <th>epidural</th>\n", + " <th>intraparenchymal</th>\n", + " <th>intraventricular</th>\n", + " <th>subarachnoid</th>\n", + " <th>subdural</th>\n", + " </tr>\n", + " <tr>\n", + " <th>ImageID</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>ID_000012eaf</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ID_000039fa0</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ID_00005679d</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ID_00008ce3c</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ID_0000950d7</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "Diagnosis any epidural intraparenchymal intraventricular subarachnoid \\\n", + "ImageID \n", + "ID_000012eaf 0 0 0 0 0 \n", + "ID_000039fa0 0 0 0 0 0 \n", + "ID_00005679d 0 0 0 0 0 \n", + "ID_00008ce3c 0 0 0 0 0 \n", + "ID_0000950d7 0 0 0 0 0 \n", + "\n", + "Diagnosis subdural \n", + "ImageID \n", + "ID_000012eaf 0 \n", + "ID_000039fa0 0 \n", + "ID_00005679d 0 \n", + "ID_00008ce3c 0 \n", + "ID_0000950d7 0 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_df = pd.read_csv(f'{base_url}/stage_2_train.csv').drop_duplicates()\n", + "train_df['ImageID'] = train_df['ID'].str.slice(stop=12)\n", + "train_df['Diagnosis'] = train_df['ID'].str.slice(start=13)\n", + "train_labels = train_df.pivot(index=\"ImageID\", columns=\"Diagnosis\", values=\"Label\")\n", + "train_labels.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "train_metadata = pd.read_parquet(f'{base_url}/train_metadata.parquet.gzip')\n", + "test_metadata = pd.read_parquet(f'{base_url}/test_metadata.parquet.gzip')\n", + "\n", + "train_metadata[\"Dataset\"] = \"train\"\n", + "test_metadata[\"Dataset\"] = \"test\"\n", + "\n", + "train_metadata = train_metadata.join(train_labels)\n", + "\n", + "metadata = pd.concat([train_metadata, test_metadata], sort=True)\n", + "metadata.sort_values(by=\"ImagePositionPatient_2\", inplace=True, ascending=False)\n", + "metadata.sort_values(['PatientID','ImagePositionPatient_2'],inplace=True)\n", + "metadata.drop(['ID_6431af929'],inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>BitsAllocated</th>\n", + " <th>BitsStored</th>\n", + " <th>Columns</th>\n", + " <th>Dataset</th>\n", + " <th>HighBit</th>\n", + " <th>ImageOrientationPatient_0</th>\n", + " <th>ImageOrientationPatient_1</th>\n", + " <th>ImageOrientationPatient_2</th>\n", + " <th>ImageOrientationPatient_3</th>\n", + " <th>ImageOrientationPatient_4</th>\n", + " <th>...</th>\n", + " <th>StudyID</th>\n", + " <th>StudyInstanceUID</th>\n", + " <th>WindowCenter</th>\n", + " <th>WindowWidth</th>\n", + " <th>any</th>\n", + " <th>epidural</th>\n", + " <th>intraparenchymal</th>\n", + " <th>intraventricular</th>\n", + " <th>subarachnoid</th>\n", + " <th>subdural</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Image</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>ID_45785016b</th>\n", + " <td>16</td>\n", + " <td>16</td>\n", + " <td>512</td>\n", + " <td>train</td>\n", + " <td>15</td>\n", + " <td>1.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.993572</td>\n", + " <td>...</td>\n", + " <td></td>\n", + " <td>ID_66929e09d4</td>\n", + " <td>30.0</td>\n", + " <td>80.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ID_37f32aed2</th>\n", + " <td>16</td>\n", + " <td>16</td>\n", + " <td>512</td>\n", + " <td>train</td>\n", + " <td>15</td>\n", + " <td>1.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.993572</td>\n", + " <td>...</td>\n", + " <td></td>\n", + " <td>ID_66929e09d4</td>\n", + " <td>30.0</td>\n", + " <td>80.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ID_1b9de2922</th>\n", + " <td>16</td>\n", + " <td>16</td>\n", + " <td>512</td>\n", + " <td>train</td>\n", + " <td>15</td>\n", + " <td>1.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.993572</td>\n", + " <td>...</td>\n", + " <td></td>\n", + " <td>ID_66929e09d4</td>\n", + " <td>30.0</td>\n", + " <td>80.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ID_d61a6a7b9</th>\n", + " <td>16</td>\n", + " <td>16</td>\n", + " <td>512</td>\n", + " <td>train</td>\n", + " <td>15</td>\n", + " <td>1.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.993572</td>\n", + " <td>...</td>\n", + " <td></td>\n", + " <td>ID_66929e09d4</td>\n", + " <td>30.0</td>\n", + " <td>80.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ID_406c82112</th>\n", + " <td>16</td>\n", + " <td>16</td>\n", + " <td>512</td>\n", + " <td>train</td>\n", + " <td>15</td>\n", + " <td>1.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.993572</td>\n", + " <td>...</td>\n", + " <td></td>\n", + " <td>ID_66929e09d4</td>\n", + " <td>30.0</td>\n", + " <td>80.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 36 columns</p>\n", + "</div>" + ], + "text/plain": [ + " BitsAllocated BitsStored Columns Dataset HighBit \\\n", + "Image \n", + "ID_45785016b 16 16 512 train 15 \n", + "ID_37f32aed2 16 16 512 train 15 \n", + "ID_1b9de2922 16 16 512 train 15 \n", + "ID_d61a6a7b9 16 16 512 train 15 \n", + "ID_406c82112 16 16 512 train 15 \n", + "\n", + " ImageOrientationPatient_0 ImageOrientationPatient_1 \\\n", + "Image \n", + "ID_45785016b 1.0 0.0 \n", + "ID_37f32aed2 1.0 0.0 \n", + "ID_1b9de2922 1.0 0.0 \n", + "ID_d61a6a7b9 1.0 0.0 \n", + "ID_406c82112 1.0 0.0 \n", + "\n", + " ImageOrientationPatient_2 ImageOrientationPatient_3 \\\n", + "Image \n", + "ID_45785016b 0.0 0.0 \n", + "ID_37f32aed2 0.0 0.0 \n", + "ID_1b9de2922 0.0 0.0 \n", + "ID_d61a6a7b9 0.0 0.0 \n", + "ID_406c82112 0.0 0.0 \n", + "\n", + " ImageOrientationPatient_4 ... StudyID StudyInstanceUID \\\n", + "Image ... \n", + "ID_45785016b 0.993572 ... ID_66929e09d4 \n", + "ID_37f32aed2 0.993572 ... ID_66929e09d4 \n", + "ID_1b9de2922 0.993572 ... ID_66929e09d4 \n", + "ID_d61a6a7b9 0.993572 ... ID_66929e09d4 \n", + "ID_406c82112 0.993572 ... ID_66929e09d4 \n", + "\n", + " WindowCenter WindowWidth any epidural intraparenchymal \\\n", + "Image \n", + "ID_45785016b 30.0 80.0 0.0 0.0 0.0 \n", + "ID_37f32aed2 30.0 80.0 0.0 0.0 0.0 \n", + "ID_1b9de2922 30.0 80.0 0.0 0.0 0.0 \n", + "ID_d61a6a7b9 30.0 80.0 0.0 0.0 0.0 \n", + "ID_406c82112 30.0 80.0 0.0 0.0 0.0 \n", + "\n", + " intraventricular subarachnoid subdural \n", + "Image \n", + "ID_45785016b 0.0 0.0 0.0 \n", + "ID_37f32aed2 0.0 0.0 0.0 \n", + "ID_1b9de2922 0.0 0.0 0.0 \n", + "ID_d61a6a7b9 0.0 0.0 0.0 \n", + "ID_406c82112 0.0 0.0 0.0 \n", + "\n", + "[5 rows x 36 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metadata.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "test_df = metadata[metadata['Dataset'] == 'test'].iloc[:,:-6].drop(['Dataset'], axis= 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "train_df = metadata[metadata['Dataset'] == 'train'].drop(['Dataset'],axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(752802, 6)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_y = train_df[['any','epidural','intraparenchymal','intraventricular', 'subarachnoid','subdural']]\n", + "train_y.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(752802, 35)" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "18938" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_df.PatientID.nunique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dividing dataset into K-fold with equal patientID" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "with open('df.pkl', 'rb') as f:\n", + " df = pickle.load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead tr th {\n", + " text-align: left;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr>\n", + " <th></th>\n", + " <th>ID</th>\n", + " <th colspan=\"6\" halign=\"left\">Label</th>\n", + " <th>filepath</th>\n", + " <th>PatientID</th>\n", + " <th>StudyID</th>\n", + " <th>SeriesID</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Subtype</th>\n", + " <th></th>\n", + " <th>any</th>\n", + " <th>epidural</th>\n", + " <th>intraparenchymal</th>\n", + " <th>intraventricular</th>\n", + " <th>subarachnoid</th>\n", + " <th>subdural</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>752798</th>\n", + " <td>ID_ffff82e46</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>/home/ubuntu/kaggle/rsna-intracranial-hemorrha...</td>\n", + " <td>ID_a85c9d08</td>\n", + " <td>ID_eca4bf46ac</td>\n", + " <td>ID_3ef9b97743</td>\n", + " </tr>\n", + " <tr>\n", + " <th>752799</th>\n", + " <td>ID_ffff922b9</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>/home/ubuntu/kaggle/rsna-intracranial-hemorrha...</td>\n", + " <td>ID_5964c5e5</td>\n", + " <td>ID_b47ca0ad05</td>\n", + " <td>ID_6d2a9b2810</td>\n", + " </tr>\n", + " <tr>\n", + " <th>752800</th>\n", + " <td>ID_ffffb670a</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>/home/ubuntu/kaggle/rsna-intracranial-hemorrha...</td>\n", + " <td>ID_4f7414e4</td>\n", + " <td>ID_ffb2e70ba3</td>\n", + " <td>ID_87b33b4a10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>752801</th>\n", + " <td>ID_ffffcbff8</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>/home/ubuntu/kaggle/rsna-intracranial-hemorrha...</td>\n", + " <td>ID_a5382712</td>\n", + " <td>ID_ff0ba45814</td>\n", + " <td>ID_bd174db91c</td>\n", + " </tr>\n", + " <tr>\n", + " <th>752802</th>\n", + " <td>ID_fffff9393</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>/home/ubuntu/kaggle/rsna-intracranial-hemorrha...</td>\n", + " <td>ID_41db05df</td>\n", + " <td>ID_7c887292d5</td>\n", + " <td>ID_dff8d8efd5</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " ID Label \\\n", + "Subtype any epidural intraparenchymal intraventricular \n", + "752798 ID_ffff82e46 0 0 0 0 \n", + "752799 ID_ffff922b9 1 0 0 1 \n", + "752800 ID_ffffb670a 1 0 0 0 \n", + "752801 ID_ffffcbff8 0 0 0 0 \n", + "752802 ID_fffff9393 0 0 0 0 \n", + "\n", + " \\\n", + "Subtype subarachnoid subdural \n", + "752798 0 0 \n", + "752799 0 0 \n", + "752800 1 0 \n", + "752801 0 0 \n", + "752802 0 0 \n", + "\n", + " filepath PatientID \\\n", + "Subtype \n", + "752798 /home/ubuntu/kaggle/rsna-intracranial-hemorrha... ID_a85c9d08 \n", + "752799 /home/ubuntu/kaggle/rsna-intracranial-hemorrha... ID_5964c5e5 \n", + "752800 /home/ubuntu/kaggle/rsna-intracranial-hemorrha... ID_4f7414e4 \n", + "752801 /home/ubuntu/kaggle/rsna-intracranial-hemorrha... ID_a5382712 \n", + "752802 /home/ubuntu/kaggle/rsna-intracranial-hemorrha... ID_41db05df \n", + "\n", + " StudyID SeriesID \n", + "Subtype \n", + "752798 ID_eca4bf46ac ID_3ef9b97743 \n", + "752799 ID_b47ca0ad05 ID_6d2a9b2810 \n", + "752800 ID_ffb2e70ba3 ID_87b33b4a10 \n", + "752801 ID_ff0ba45814 ID_bd174db91c \n", + "752802 ID_7c887292d5 ID_dff8d8efd5 " + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Distribution of number of CT-Scan for Each PatientID" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAATdklEQVR4nO3de4xcZ3nH8e9DDLmZxg4JK9d26yAsmhA3t1VimgqtE5o4CcJRlUhGETjIrf8JaqhSgdOKhksiggoEkADVwinhIpY0hMZygNRyvG2plJvJxXZMZEOs4CSNoXZMnUCE6dM/5l2Ybma9F+/OHPv9fqTRznnPe+Y858zMb86+c2YmMhNJUh1e0+sCJEndY+hLUkUMfUmqiKEvSRUx9CWpIjN6XcChnHLKKblgwYJJL//SSy9x4oknTl1BU6SpdUFza2tqXdDc2ppaFzS3tqbWBROrbfPmzT/PzFM7zszMxl7OO++8PBybNm06rOWnS1PrymxubU2tK7O5tTW1rszm1tbUujInVhvwSI6Sqw7vSFJFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRRr9NQy1WLD63o7tu269osuVSDraeaQvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqyLhDPyKOiYhHI2J9mT4tIh6MiB0R8a2IeF1pP7ZM7yzzF7Tdxo2l/amIuHSqN0aSdGgTOdK/HtjeNv1J4LbMXAjsA1aW9pXAvsx8M3Bb6UdEnAEsB94KLAW+GBHHHF75kqSJGFfoR8Q84Argy2U6gIuAu0qXO4Ary/VlZZoy/+LSfxkwmJmvZObTwE7g/KnYCEnS+ERmjt0p4i7gE8Drgb8BrgUeKEfzRMR84HuZeWZEbAWWZubuMu/HwAXAR8oyXy/ta8syd41Y1ypgFUBfX995g4ODk964AwcOMHPmzEkvP11G1rXl2f0d+y2ae1K3SvqtI2WfNUlTa2tqXdDc2ppaF0ystiVLlmzOzP5O82aMtXBEvBPYk5mbI2JguLlD1xxj3qGW+V1D5hpgDUB/f38ODAyM7DJuQ0NDHM7y02VkXdeuvrdjv13XDHRsn05Hyj5rkqbW1tS6oLm1NbUumLraxgx94ELgXRFxOXAc8HvAZ4FZETEjMw8C84DnSv/dwHxgd0TMAE4C9ra1D2tfRpLUBWOO6WfmjZk5LzMX0Hoj9v7MvAbYBFxVuq0A7inX15Vpyvz7szWGtA5YXs7uOQ1YCDw0ZVsiSRrTeI70R/MhYDAibgYeBdaW9rXA1yJiJ60j/OUAmbktIu4EngQOAtdl5m8OY/2SpAmaUOhn5hAwVK7/hA5n32Tmr4CrR1n+FuCWiRYpSZoafiJXkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUZM/Qj4riIeCgiHo+IbRHx0dJ+WkQ8GBE7IuJbEfG60n5smd5Z5i9ou60bS/tTEXHpdG2UJKmz8RzpvwJclJlnAWcDSyNiMfBJ4LbMXAjsA1aW/iuBfZn5ZuC20o+IOANYDrwVWAp8MSKOmcqNkSQd2pihny0HyuRryyWBi4C7SvsdwJXl+rIyTZl/cUREaR/MzFcy82lgJ3D+lGyFJGlcIjPH7tQ6It8MvBn4AvAPwAPlaJ6ImA98LzPPjIitwNLM3F3m/Ri4APhIWebrpX1tWeauEetaBawC6OvrO29wcHDSG3fgwAFmzpw56eWny8i6tjy7v2O/RXNP6lZJv3Wk7LMmaWptTa0LmltbU+uCidW2ZMmSzZnZ32nejPHcQGb+Bjg7ImYB3wFO79St/I1R5o3WPnJda4A1AP39/TkwMDCeEjsaGhricJafLiPrunb1vR377bpmoGP7dDpS9lmTNLW2ptYFza2tqXXB1NU2obN3MvNFYAhYDMyKiOEXjXnAc+X6bmA+QJl/ErC3vb3DMpKkLhjP2TunliN8IuJ44B3AdmATcFXptgK4p1xfV6Yp8+/P1hjSOmB5ObvnNGAh8NBUbYgkaWzjGd6ZA9xRxvVfA9yZmesj4klgMCJuBh4F1pb+a4GvRcROWkf4ywEyc1tE3Ak8CRwErivDRpKkLhkz9DPzCeCcDu0/ocPZN5n5K+DqUW7rFuCWiZcpSZoKfiJXkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFZnR6wI0dRasvnfUebtuvaKLlUhqKkN/GowWvk0M3pG13rDoINeuvreRtUo6fA7vSFJFDH1JqoihL0kVMfQlqSKGviRVZMzQj4j5EbEpIrZHxLaIuL60nxwRGyJiR/k7u7RHRHw+InZGxBMRcW7bba0o/XdExIrp2yxJUifjOdI/CNyQmacDi4HrIuIMYDWwMTMXAhvLNMBlwMJyWQV8CVovEsBNwAXA+cBNwy8UkqTuGDP0M/P5zPxhuf4/wHZgLrAMuKN0uwO4slxfBnw1Wx4AZkXEHOBSYENm7s3MfcAGYOmUbo0k6ZAmNKYfEQuAc4AHgb7MfB5aLwzAG0u3ucBP2xbbXdpGa5ckdUlk5vg6RswE/g24JTPvjogXM3NW2/x9mTk7Iu4FPpGZPyjtG4EPAhcBx2bmzaX9w8DLmfnpEetZRWtYiL6+vvMGBwcnvXEHDhxg5syZk15+srY8u79j+6K5JwGvrmus/oe73kPd1shl+o6HF3458XVPt17dl+PR1NqaWhc0t7am1gUTq23JkiWbM7O/07xxfQ1DRLwW+Dbwjcy8uzS/EBFzMvP5Mnyzp7TvBua3LT4PeK60D4xoHxq5rsxcA6wB6O/vz4GBgZFdxm1oaIjDWX6yrh3taxiuGQBeXddY/Q93vYe6rZHL3LDoIJ/eMmPC655uvbovx6OptTW1LmhubU2tC6autvGcvRPAWmB7Zn6mbdY6YPgMnBXAPW3t7y1n8SwG9pfhn/uASyJidnkD95LSJknqkvEc6V8IvAfYEhGPlba/BW4F7oyIlcAzwNVl3neBy4GdwMvA+wAyc29EfBx4uPT7WGbunZKtkCSNy5ihX8bmY5TZF3fon8B1o9zW7cDtEylQkjR1/ESuJFXE0Jekihj6klQRQ1+SKuLPJTbYkfSzi5KODB7pS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkX85awuGv4lrBsWHeTaUX4VS5Kmk0f6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFRkz9CPi9ojYExFb29pOjogNEbGj/J1d2iMiPh8ROyPiiYg4t22ZFaX/johYMT2bI0k6lPEc6X8FWDqibTWwMTMXAhvLNMBlwMJyWQV8CVovEsBNwAXA+cBNwy8UkqTuGTP0M/Pfgb0jmpcBd5TrdwBXtrV/NVseAGZFxBzgUmBDZu7NzH3ABl79QiJJmmaRmWN3ilgArM/MM8v0i5k5q23+vsycHRHrgVsz8welfSPwIWAAOC4zby7tHwZ+mZmf6rCuVbT+S6Cvr++8wcHBSW/cgQMHmDlz5qSXn6wtz+4/5Py+4+GFX07+9hfNPWnC6x3vMsO1jda/V3p1X45HU2tral3Q3NqaWhdMrLYlS5Zszsz+TvOm+pezokNbHqL91Y2Za4A1AP39/TkwMDDpYoaGhjic5SdrrF/FumHRQT69ZfK7ftc1AxNe73iXGa5ttP690qv7cjyaWltT64Lm1tbUumDqapvs2TsvlGEbyt89pX03ML+t3zzguUO0S5K6aLKhvw4YPgNnBXBPW/t7y1k8i4H9mfk8cB9wSUTMLm/gXlLaJEldNOYYQ0R8k9aY/CkRsZvWWTi3AndGxErgGeDq0v27wOXATuBl4H0Ambk3Ij4OPFz6fSwzR745LEmaZmOGfma+e5RZF3fom8B1o9zO7cDtE6pOkjSl/ESuJFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRab6l7PUBQvG+GUuSRqNR/qSVBFDX5Iq4vBOJaZ7SGi029916xXTul5JE+ORviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcRP5KojP2ErHZ080pekihj6klQRh3fUEw4fSb1h6Etd4gudmsDhHUmqiKEvSRVxeEeapOHhmhsWHeTatqGb6R6umegwkcNKamfot/HJMfX8xa7Jm+591w2jbcNXlp7Y5Uo0zNBXVQ4VpEf6C8XIbRv5H4gEPQj9iFgKfA44BvhyZt7a7RokTc7R/J9VLboa+hFxDPAF4M+A3cDDEbEuM5/sZh0T5QP9d3o1XHMkDQdMdB81cRinadswVc/BXj6Xm5Ij3T7SPx/YmZk/AYiIQWAZ0OjQH00Tn6xHqy3P7p/QUMVU3jdH6/3cxO3qVU2jvSl/KFMV1t1+MYjMnJYb7riyiKuApZn5F2X6PcAFmfn+tj6rgFVl8i3AU4exylOAnx/G8tOlqXVBc2tral3Q3NqaWhc0t7am1gUTq+0PM/PUTjO6faQfHdr+36tOZq4B1kzJyiIeycz+qbitqdTUuqC5tTW1LmhubU2tC5pbW1PrgqmrrdsfztoNzG+bngc81+UaJKla3Q79h4GFEXFaRLwOWA6s63INklStrg7vZObBiHg/cB+tUzZvz8xt07jKKRkmmgZNrQuaW1tT64Lm1tbUuqC5tTW1LpiqYe9uvpErSeotv3BNkipi6EtSRY6K0I+I+RGxKSK2R8S2iLi+tJ8cERsiYkf5O7sHtR0XEQ9FxOOlto+W9tMi4sFS27fKG9tdFxHHRMSjEbG+YXXtiogtEfFYRDxS2ppwf86KiLsi4kfl8fa2htT1lrKvhi+/iIgPNKS2vy6P/a0R8c3ynGjK4+z6Ute2iPhAaev6PouI2yNiT0RsbWvrWEe0fD4idkbEExFx7kTWdVSEPnAQuCEzTwcWA9dFxBnAamBjZi4ENpbpbnsFuCgzzwLOBpZGxGLgk8BtpbZ9wMoe1AZwPbC9bbopdQEsycyz285NbsL9+Tng+5n5R8BZtPZdz+vKzKfKvjobOA94GfhOr2uLiLnAXwH9mXkmrRM4ltOAx1lEnAn8Ja1vCjgLeGdELKQ3++wrwNIRbaPVcRmwsFxWAV+a0Joy86i7APfQ+n6fp4A5pW0O8FSP6zoB+CFwAa1P1s0o7W8D7utBPfPKg+kiYD2tD8/1vK6y7l3AKSPaenp/Ar8HPE05AaIpdXWo8xLgP5tQGzAX+ClwMq2zBdcDlzbhcQZcTetLH4enPwx8sFf7DFgAbB3rcQX8I/DuTv3GczlajvR/KyIWAOcADwJ9mfk8QPn7xh7VdExEPAbsATYAPwZezMyDpctuWk+ObvssrQf5/5bpNzSkLmh9UvtfI2Jz+WoO6P39+SbgZ8A/lSGxL0fEiQ2oa6TlwDfL9Z7WlpnPAp8CngGeB/YDm2nG42wr8PaIeENEnABcTuvDo025P0erY/iFdNiE9t9RFfoRMRP4NvCBzPxFr+sZlpm/yda/3fNo/St5eqdu3awpIt4J7MnMze3NHbr26pzeCzPzXFr/yl4XEW/vUR3tZgDnAl/KzHOAl+jNENOoytj4u4B/7nUtAGUcehlwGvD7wIm07tORuv44y8zttIaZNgDfBx6nNVTcdIf1PD1qQj8iXksr8L+RmXeX5hciYk6ZP4fWkXbPZOaLwBCt9x1mRcTwh+N68XUUFwLviohdwCCtIZ7PNqAuADLzufJ3D62x6fPp/f25G9idmQ+W6btovQj0uq52lwE/zMwXynSva3sH8HRm/iwzfw3cDfwJzXmcrc3MczPz7cBeYAe932fDRqvjsL7O5qgI/YgIYC2wPTM/0zZrHbCiXF9Ba6y/27WdGhGzyvXjaT0JtgObgKt6VVtm3piZ8zJzAa3hgPsz85pe1wUQESdGxOuHr9Mao95Kj+/PzPwv4KcR8ZbSdDGtrwXv+eOszbv53dAO9L62Z4DFEXFCeZ4O77OeP84AIuKN5e8fAH9Oa9/1ep8NG62OdcB7y1k8i4H9w8NA49LtN0+m6Q2QP6X1780TwGPlcjmtMeqNtF69NwIn96C2PwYeLbVtBf6+tL8JeAjYSetf8WN7uP8GgPVNqavU8Hi5bAP+rrQ34f48G3ik3J//AsxuQl2lthOA/wZOamvreW3AR4Eflcf/14Bjm/A4K7X9B60XoceBi3u1z2i92DwP/JrWkfzK0eqgNbzzBVrvDW6hdWbUuNfl1zBIUkWOiuEdSdL4GPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIv8HcUE1AMKbz9IAAAAASUVORK5CYII=\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "tmp = df[\"PatientID\"].value_counts()\n", + "tmp[tmp<100].hist(bins=50);" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18938\n" + ] + } + ], + "source": [ + "patient_id_train = set(df[\"PatientID\"].unique())\n", + "print(len(patient_id_train))" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Image</th>\n", + " <th>SOPInstanceUID</th>\n", + " <th>PatientID</th>\n", + " <th>any</th>\n", + " <th>epidural</th>\n", + " <th>intraparenchymal</th>\n", + " <th>intraventricular</th>\n", + " <th>subarachnoid</th>\n", + " <th>subdural</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>ID_45785016b</td>\n", + " <td>ID_45785016b</td>\n", + " <td>ID_0002cd41</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>ID_37f32aed2</td>\n", + " <td>ID_37f32aed2</td>\n", + " <td>ID_0002cd41</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>ID_1b9de2922</td>\n", + " <td>ID_1b9de2922</td>\n", + " <td>ID_0002cd41</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>ID_d61a6a7b9</td>\n", + " <td>ID_d61a6a7b9</td>\n", + " <td>ID_0002cd41</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>ID_406c82112</td>\n", + " <td>ID_406c82112</td>\n", + " <td>ID_0002cd41</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Image SOPInstanceUID PatientID any epidural intraparenchymal \\\n", + "0 ID_45785016b ID_45785016b ID_0002cd41 0.0 0.0 0.0 \n", + "1 ID_37f32aed2 ID_37f32aed2 ID_0002cd41 0.0 0.0 0.0 \n", + "2 ID_1b9de2922 ID_1b9de2922 ID_0002cd41 0.0 0.0 0.0 \n", + "3 ID_d61a6a7b9 ID_d61a6a7b9 ID_0002cd41 0.0 0.0 0.0 \n", + "4 ID_406c82112 ID_406c82112 ID_0002cd41 0.0 0.0 0.0 \n", + "\n", + " intraventricular subarachnoid subdural \n", + "0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 " + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train = train_df[['SOPInstanceUID','PatientID','any','epidural','intraparenchymal','intraventricular','subarachnoid','subdural']]\n", + "train.reset_index(inplace=True)\n", + "train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(752802, 9)\n", + "(752802, 9)\n" + ] + } + ], + "source": [ + "IGNORE_IDS = ['ID_6431af929']\n", + "print(train.shape)\n", + "train = train[~train['SOPInstanceUID'].isin(IGNORE_IDS)]\n", + "print(train.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(18938,)\n" + ] + } + ], + "source": [ + "patient_id = train[\"PatientID\"].unique() \n", + "print(patient_id.shape) " + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(151865, 10)\n", + "(148063, 10)\n", + "(151306, 10)\n", + "(150597, 10)\n", + "(150971, 10)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import KFold\n", + "\n", + "seed = 2020\n", + "n_splits = 5\n", + "kf = KFold(n_splits=n_splits, shuffle=True, random_state=seed) # StratifiedKFold\n", + "\n", + "fold = 0\n", + "for train_index, valid_index in kf.split(patient_id):\n", + " df_train = train[train[\"PatientID\"].isin(patient_id[train_index])]\n", + " df_valid = train[train[\"PatientID\"].isin(patient_id[valid_index])]\n", + " \n", + " df_train.to_csv(\"train_{}.csv\".format(fold), index=None)\n", + " df_valid.to_csv(\"valid_{}.csv\".format(fold), index=None)\n", + " \n", + " fold += 1\n", + " print(df_valid.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Image</th>\n", + " <th>SOPInstanceUID</th>\n", + " <th>PatientID</th>\n", + " <th>any</th>\n", + " <th>epidural</th>\n", + " <th>intraparenchymal</th>\n", + " <th>intraventricular</th>\n", + " <th>subarachnoid</th>\n", + " <th>subdural</th>\n", + " <th>set</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>36</th>\n", + " <td>ID_138d275c8</td>\n", + " <td>ID_138d275c8</td>\n", + " <td>ID_00054f3f</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>37</th>\n", + " <td>ID_447fa09d9</td>\n", + " <td>ID_447fa09d9</td>\n", + " <td>ID_00054f3f</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>38</th>\n", + " <td>ID_0f1298f68</td>\n", + " <td>ID_0f1298f68</td>\n", + " <td>ID_00054f3f</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>39</th>\n", + " <td>ID_c24918b79</td>\n", + " <td>ID_c24918b79</td>\n", + " <td>ID_00054f3f</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>40</th>\n", + " <td>ID_c0005a263</td>\n", + " <td>ID_c0005a263</td>\n", + " <td>ID_00054f3f</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>752797</th>\n", + " <td>ID_72e823e2c</td>\n", + " <td>ID_72e823e2c</td>\n", + " <td>ID_fffc2bd6</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>752798</th>\n", + " <td>ID_4184c4f03</td>\n", + " <td>ID_4184c4f03</td>\n", + " <td>ID_fffc2bd6</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>752799</th>\n", + " <td>ID_a8aca4f40</td>\n", + " <td>ID_a8aca4f40</td>\n", + " <td>ID_fffc2bd6</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>752800</th>\n", + " <td>ID_716b72762</td>\n", + " <td>ID_716b72762</td>\n", + " <td>ID_fffc2bd6</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>752801</th>\n", + " <td>ID_deb85caf0</td>\n", + " <td>ID_deb85caf0</td>\n", + " <td>ID_fffc2bd6</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>601831 rows × 10 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Image SOPInstanceUID PatientID any epidural \\\n", + "36 ID_138d275c8 ID_138d275c8 ID_00054f3f 0.0 0.0 \n", + "37 ID_447fa09d9 ID_447fa09d9 ID_00054f3f 0.0 0.0 \n", + "38 ID_0f1298f68 ID_0f1298f68 ID_00054f3f 0.0 0.0 \n", + "39 ID_c24918b79 ID_c24918b79 ID_00054f3f 0.0 0.0 \n", + "40 ID_c0005a263 ID_c0005a263 ID_00054f3f 0.0 0.0 \n", + "... ... ... ... ... ... \n", + "752797 ID_72e823e2c ID_72e823e2c ID_fffc2bd6 0.0 0.0 \n", + "752798 ID_4184c4f03 ID_4184c4f03 ID_fffc2bd6 0.0 0.0 \n", + "752799 ID_a8aca4f40 ID_a8aca4f40 ID_fffc2bd6 0.0 0.0 \n", + "752800 ID_716b72762 ID_716b72762 ID_fffc2bd6 0.0 0.0 \n", + "752801 ID_deb85caf0 ID_deb85caf0 ID_fffc2bd6 0.0 0.0 \n", + "\n", + " intraparenchymal intraventricular subarachnoid subdural set \n", + "36 0.0 0.0 0.0 0.0 0 \n", + "37 0.0 0.0 0.0 0.0 0 \n", + "38 0.0 0.0 0.0 0.0 0 \n", + "39 0.0 0.0 0.0 0.0 0 \n", + "40 0.0 0.0 0.0 0.0 0 \n", + "... ... ... ... ... ... \n", + "752797 0.0 0.0 0.0 0.0 0 \n", + "752798 0.0 0.0 0.0 0.0 0 \n", + "752799 0.0 0.0 0.0 0.0 0 \n", + "752800 0.0 0.0 0.0 0.0 0 \n", + "752801 0.0 0.0 0.0 0.0 0 \n", + "\n", + "[601831 rows x 10 columns]" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_train" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}