765 lines (764 with data), 22.2 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/reina/anaconda3/envs/RSNA/lib/python3.6/importlib/_bootstrap.py:219: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__\n",
" return f(*args, **kwds)\n",
"/home/reina/anaconda3/envs/RSNA/lib/python3.6/importlib/_bootstrap.py:219: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__\n",
" return f(*args, **kwds)\n"
]
}
],
"source": [
"from __future__ import absolute_import\n",
"from __future__ import division\n",
"from __future__ import print_function\n",
"\n",
"import glob, pylab, pandas as pd\n",
"import pydicom, numpy as np\n",
"from os import listdir\n",
"from os.path import isfile, join\n",
"import matplotlib.pylab as plt\n",
"import os\n",
"import seaborn as sns\n",
"import warnings\n",
"warnings.filterwarnings(action='once')\n",
"import pickle\n",
"from tqdm import tqdm, tqdm_notebook\n",
"%load_ext autoreload\n",
"%autoreload 2\n",
"%matplotlib notebook\n",
"from helper import *\n",
"import time\n",
"from IPython.core.interactiveshell import InteractiveShell\n",
"InteractiveShell.ast_node_interactivity = \"all\"\n",
"from defenitions import *"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"train_base_df = pd.read_csv(train_images_dir)"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4045572"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_base_df.shape[0]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>Label</th>\n",
" <th>Sub_type</th>\n",
" <th>PatientID</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>ID_28fbab7eb_epidural</td>\n",
" <td>0.5</td>\n",
" <td>epidural</td>\n",
" <td>28fbab7eb</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>ID_28fbab7eb_intraparenchymal</td>\n",
" <td>0.5</td>\n",
" <td>intraparenchymal</td>\n",
" <td>28fbab7eb</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>ID_28fbab7eb_intraventricular</td>\n",
" <td>0.5</td>\n",
" <td>intraventricular</td>\n",
" <td>28fbab7eb</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>ID_28fbab7eb_subarachnoid</td>\n",
" <td>0.5</td>\n",
" <td>subarachnoid</td>\n",
" <td>28fbab7eb</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>ID_28fbab7eb_subdural</td>\n",
" <td>0.5</td>\n",
" <td>subdural</td>\n",
" <td>28fbab7eb</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Label Sub_type PatientID\n",
"0 ID_28fbab7eb_epidural 0.5 epidural 28fbab7eb\n",
"1 ID_28fbab7eb_intraparenchymal 0.5 intraparenchymal 28fbab7eb\n",
"2 ID_28fbab7eb_intraventricular 0.5 intraventricular 28fbab7eb\n",
"3 ID_28fbab7eb_subarachnoid 0.5 subarachnoid 28fbab7eb\n",
"4 ID_28fbab7eb_subdural 0.5 subdural 28fbab7eb"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_base_df['Sub_type'] = train_base_df['ID'].str.split(\"_\", n = 3, expand = True)[2]\n",
"train_base_df['PatientID'] = train_base_df['ID'].str.split(\"_\", n = 3, expand = True)[1]\n",
"train_base_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid',\n",
" 'subdural', 'any'], dtype=object)"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sub_types=train_base_df.Sub_type.unique()\n",
"sub_types"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ae4afd5b0d824567b34dd59b1b81e430",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=6), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"dfs =[]\n",
"for sub_type in tqdm_notebook(sub_types):\n",
" df = train_base_df[train_base_df.Sub_type==sub_type][['PatientID','Label']].copy()\n",
" df=df.rename(columns={\"Label\": sub_type}).reset_index(drop=True)\n",
" dfs.append(df)\n",
"train_df=dfs[0]\n",
"for df in tqdm_notebook(dfs[1:]):\n",
" train_df=test_df.merge(df,on='PatientID')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5fe21e059b0f4fff9decf2a356119816",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=78545), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"PID = np.zeros(train_df.shape[0],dtype=object)\n",
"StudyI = np.zeros(train_df.shape[0],dtype=object)\n",
"SeriesI = np.zeros(train_df.shape[0],dtype=object)\n",
"WindowCenter = np.zeros(train_df.shape[0],dtype=object)\n",
"WindowWidth = np.zeros(train_df.shape[0],dtype=object)\n",
"ImagePositionX = np.zeros(train_df.shape[0],dtype=np.float)\n",
"ImagePositionY = np.zeros(train_df.shape[0],dtype=np.float)\n",
"ImagePositionZ = np.zeros(train_df.shape[0],dtype=np.float)\n",
"\n",
"for i,row in tqdm_notebook(train_df.iterrows(),total=train_df.shape[0]):\n",
" ds = pydicom.dcmread(train_images_dir + 'ID_{}.dcm'.format(row['PatientID']))\n",
" SeriesI[i]=ds.SeriesInstanceUID\n",
" PID[i]=ds.PatientID\n",
" StudyI[i]=ds.StudyInstanceUID \n",
" WindowCenter[i]=ds.WindowCenter\n",
" WindowWidth[i]=ds.WindowWidth\n",
" ImagePositionX[i]=float(ds.ImagePositionPatient[0])\n",
" ImagePositionY[i]=float(ds.ImagePositionPatient[1])\n",
" ImagePositionZ[i]=float(ds.ImagePositionPatient[2])\n",
"train_df['SeriesI']=SeriesI\n",
"train_df['PID']=PID\n",
"train_df['StudyI']=StudyI\n",
"train_df['WindowCenter']=WindowCenter\n",
"train_df['WindowWidth']=WindowWidth\n",
"train_df['ImagePositionZ']=ImagePositionZ\n",
"train_df['ImagePositionX']=ImagePositionX\n",
"train_df['ImagePositionY']=ImagePositionY\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_df.to_csv(data_dir+'train.csv',index=False)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>Label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>ID_28fbab7eb_epidural</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>ID_28fbab7eb_intraparenchymal</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>ID_28fbab7eb_intraventricular</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>ID_28fbab7eb_subarachnoid</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>ID_28fbab7eb_subdural</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Label\n",
"0 ID_28fbab7eb_epidural 0.5\n",
"1 ID_28fbab7eb_intraparenchymal 0.5\n",
"2 ID_28fbab7eb_intraventricular 0.5\n",
"3 ID_28fbab7eb_subarachnoid 0.5\n",
"4 ID_28fbab7eb_subdural 0.5"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sample_submission=pd.read_csv(data_dir+'stage_1_sample_submission.csv')\n",
"sample_submission.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>Label</th>\n",
" <th>Sub_type</th>\n",
" <th>PatientID</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>ID_28fbab7eb_epidural</td>\n",
" <td>0.5</td>\n",
" <td>epidural</td>\n",
" <td>28fbab7eb</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>ID_28fbab7eb_intraparenchymal</td>\n",
" <td>0.5</td>\n",
" <td>intraparenchymal</td>\n",
" <td>28fbab7eb</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>ID_28fbab7eb_intraventricular</td>\n",
" <td>0.5</td>\n",
" <td>intraventricular</td>\n",
" <td>28fbab7eb</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>ID_28fbab7eb_subarachnoid</td>\n",
" <td>0.5</td>\n",
" <td>subarachnoid</td>\n",
" <td>28fbab7eb</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>ID_28fbab7eb_subdural</td>\n",
" <td>0.5</td>\n",
" <td>subdural</td>\n",
" <td>28fbab7eb</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Label Sub_type PatientID\n",
"0 ID_28fbab7eb_epidural 0.5 epidural 28fbab7eb\n",
"1 ID_28fbab7eb_intraparenchymal 0.5 intraparenchymal 28fbab7eb\n",
"2 ID_28fbab7eb_intraventricular 0.5 intraventricular 28fbab7eb\n",
"3 ID_28fbab7eb_subarachnoid 0.5 subarachnoid 28fbab7eb\n",
"4 ID_28fbab7eb_subdural 0.5 subdural 28fbab7eb"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_base_df=sample_submission.copy()\n",
"test_base_df['Sub_type'] = test_base_df['ID'].str.split(\"_\", n = 3, expand = True)[2]\n",
"test_base_df['PatientID'] = test_base_df['ID'].str.split(\"_\", n = 3, expand = True)[1]\n",
"test_base_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 234,
"metadata": {},
"outputs": [],
"source": [
"test_ids=test_df.PatientID.unique()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid',\n",
" 'subdural', 'any'], dtype=object)"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sub_types=test_base_df.Sub_type.unique()\n",
"sub_types"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f153620a2b61441bb7cb7f1acf392844",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=6), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "189e8ac701514c2c963161608d43a459",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=5), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"dfs =[]\n",
"for sub_type in tqdm_notebook(sub_types):\n",
" df = test_base_df[test_base_df.Sub_type==sub_type][['PatientID','Label']].copy()\n",
" df=df.rename(columns={\"Label\": sub_type}).reset_index(drop=True)\n",
" dfs.append(df)\n",
"test_df=dfs[0]\n",
"for df in tqdm_notebook(dfs[1:]):\n",
" test_df=test_df.merge(df,on='PatientID')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PatientID</th>\n",
" <th>epidural</th>\n",
" <th>intraparenchymal</th>\n",
" <th>intraventricular</th>\n",
" <th>subarachnoid</th>\n",
" <th>subdural</th>\n",
" <th>any</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>28fbab7eb</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>877923b8b</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>a591477cb</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>42217c898</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>a130c4d2f</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PatientID epidural intraparenchymal intraventricular subarachnoid \\\n",
"0 28fbab7eb 0.5 0.5 0.5 0.5 \n",
"1 877923b8b 0.5 0.5 0.5 0.5 \n",
"2 a591477cb 0.5 0.5 0.5 0.5 \n",
"3 42217c898 0.5 0.5 0.5 0.5 \n",
"4 a130c4d2f 0.5 0.5 0.5 0.5 \n",
"\n",
" subdural any \n",
"0 0.5 0.5 \n",
"1 0.5 0.5 \n",
"2 0.5 0.5 \n",
"3 0.5 0.5 \n",
"4 0.5 0.5 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5fe21e059b0f4fff9decf2a356119816",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=78545), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"PID = np.zeros(test_df.shape[0],dtype=object)\n",
"StudyI = np.zeros(test_df.shape[0],dtype=object)\n",
"SeriesI = np.zeros(test_df.shape[0],dtype=object)\n",
"WindowCenter = np.zeros(test_df.shape[0],dtype=object)\n",
"WindowWidth = np.zeros(test_df.shape[0],dtype=object)\n",
"ImagePositionX = np.zeros(test_df.shape[0],dtype=np.float)\n",
"ImagePositionY = np.zeros(test_df.shape[0],dtype=np.float)\n",
"ImagePositionZ = np.zeros(test_df.shape[0],dtype=np.float)\n",
"\n",
"for i,row in tqdm_notebook(test_df.iterrows(),total=test_df.shape[0]):\n",
" ds = pydicom.dcmread(test_images_dir + 'ID_{}.dcm'.format(row['PatientID']))\n",
" SeriesI[i]=ds.SeriesInstanceUID\n",
" PID[i]=ds.PatientID\n",
" StudyI[i]=ds.StudyInstanceUID \n",
" WindowCenter[i]=ds.WindowCenter\n",
" WindowWidth[i]=ds.WindowWidth\n",
" ImagePositionX[i]=float(ds.ImagePositionPatient[0])\n",
" ImagePositionY[i]=float(ds.ImagePositionPatient[1])\n",
" ImagePositionZ[i]=float(ds.ImagePositionPatient[2])\n",
"test_df['SeriesI']=SeriesI\n",
"test_df['PID']=PID\n",
"test_df['StudyI']=StudyI\n",
"test_df['WindowCenter']=WindowCenter\n",
"test_df['WindowWidth']=WindowWidth\n",
"test_df['ImagePositionZ']=ImagePositionZ\n",
"test_df['ImagePositionX']=ImagePositionX\n",
"test_df['ImagePositionY']=ImagePositionY\n"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"test_df.to_csv(data_dir+'test.csv',index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}