{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/reina/anaconda3/envs/RSNA/lib/python3.6/importlib/_bootstrap.py:219: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__\n", " return f(*args, **kwds)\n", "/home/reina/anaconda3/envs/RSNA/lib/python3.6/importlib/_bootstrap.py:219: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__\n", " return f(*args, **kwds)\n" ] } ], "source": [ "from __future__ import absolute_import\n", "from __future__ import division\n", "from __future__ import print_function\n", "\n", "import glob, pylab, pandas as pd\n", "import pydicom, numpy as np\n", "from os import listdir\n", "from os.path import isfile, join\n", "import matplotlib.pylab as plt\n", "import os\n", "import seaborn as sns\n", "import warnings\n", "warnings.filterwarnings(action='once')\n", "import pickle\n", "from tqdm import tqdm, tqdm_notebook\n", "%load_ext autoreload\n", "%autoreload 2\n", "%matplotlib notebook\n", "from helper import *\n", "import time\n", "from IPython.core.interactiveshell import InteractiveShell\n", "InteractiveShell.ast_node_interactivity = \"all\"\n", "from defenitions import *" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "data_dir = '/media/nvme/data/RSNA/'\n", "train_images_dir=data_dir+'stage_1_train_images/'\n", "test_images_dir=data_dir+'stage_1_test_images/'" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "train_base_df = pd.read_csv(data_dir+'stage_1_train.csv')" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4045572" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_base_df.shape[0]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDLabelSub_typePatientID
0ID_28fbab7eb_epidural0.5epidural28fbab7eb
1ID_28fbab7eb_intraparenchymal0.5intraparenchymal28fbab7eb
2ID_28fbab7eb_intraventricular0.5intraventricular28fbab7eb
3ID_28fbab7eb_subarachnoid0.5subarachnoid28fbab7eb
4ID_28fbab7eb_subdural0.5subdural28fbab7eb
\n", "
" ], "text/plain": [ " ID Label Sub_type PatientID\n", "0 ID_28fbab7eb_epidural 0.5 epidural 28fbab7eb\n", "1 ID_28fbab7eb_intraparenchymal 0.5 intraparenchymal 28fbab7eb\n", "2 ID_28fbab7eb_intraventricular 0.5 intraventricular 28fbab7eb\n", "3 ID_28fbab7eb_subarachnoid 0.5 subarachnoid 28fbab7eb\n", "4 ID_28fbab7eb_subdural 0.5 subdural 28fbab7eb" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_base_df['Sub_type'] = train_base_df['ID'].str.split(\"_\", n = 3, expand = True)[2]\n", "train_base_df['PatientID'] = train_base_df['ID'].str.split(\"_\", n = 3, expand = True)[1]\n", "train_base_df.head()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid',\n", " 'subdural', 'any'], dtype=object)" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub_types=train_base_df.Sub_type.unique()\n", "sub_types" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ae4afd5b0d824567b34dd59b1b81e430", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, max=6), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "dfs =[]\n", "for sub_type in tqdm_notebook(sub_types):\n", " df = train_base_df[train_base_df.Sub_type==sub_type][['PatientID','Label']].copy()\n", " df=df.rename(columns={\"Label\": sub_type}).reset_index(drop=True)\n", " dfs.append(df)\n", "train_df=dfs[0]\n", "for df in tqdm_notebook(dfs[1:]):\n", " train_df=test_df.merge(df,on='PatientID')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5fe21e059b0f4fff9decf2a356119816", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, max=78545), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "PID = np.zeros(train_df.shape[0],dtype=object)\n", "StudyI = np.zeros(train_df.shape[0],dtype=object)\n", "SeriesI = np.zeros(train_df.shape[0],dtype=object)\n", "WindowCenter = np.zeros(train_df.shape[0],dtype=object)\n", "WindowWidth = np.zeros(train_df.shape[0],dtype=object)\n", "ImagePositionX = np.zeros(train_df.shape[0],dtype=np.float)\n", "ImagePositionY = np.zeros(train_df.shape[0],dtype=np.float)\n", "ImagePositionZ = np.zeros(train_df.shape[0],dtype=np.float)\n", "\n", "for i,row in tqdm_notebook(train_df.iterrows(),total=train_df.shape[0]):\n", " ds = pydicom.dcmread(train_images_dir + 'ID_{}.dcm'.format(row['PatientID']))\n", " SeriesI[i]=ds.SeriesInstanceUID\n", " PID[i]=ds.PatientID\n", " StudyI[i]=ds.StudyInstanceUID \n", " WindowCenter[i]=ds.WindowCenter\n", " WindowWidth[i]=ds.WindowWidth\n", " ImagePositionX[i]=float(ds.ImagePositionPatient[0])\n", " ImagePositionY[i]=float(ds.ImagePositionPatient[1])\n", " ImagePositionZ[i]=float(ds.ImagePositionPatient[2])\n", "train_df['SeriesI']=SeriesI\n", "train_df['PID']=PID\n", "train_df['StudyI']=StudyI\n", "train_df['WindowCenter']=WindowCenter\n", "train_df['WindowWidth']=WindowWidth\n", "train_df['ImagePositionZ']=ImagePositionZ\n", "train_df['ImagePositionX']=ImagePositionX\n", "train_df['ImagePositionY']=ImagePositionY\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "train_df.to_csv(data_dir+'train.csv',index=False)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDLabel
0ID_28fbab7eb_epidural0.5
1ID_28fbab7eb_intraparenchymal0.5
2ID_28fbab7eb_intraventricular0.5
3ID_28fbab7eb_subarachnoid0.5
4ID_28fbab7eb_subdural0.5
\n", "
" ], "text/plain": [ " ID Label\n", "0 ID_28fbab7eb_epidural 0.5\n", "1 ID_28fbab7eb_intraparenchymal 0.5\n", "2 ID_28fbab7eb_intraventricular 0.5\n", "3 ID_28fbab7eb_subarachnoid 0.5\n", "4 ID_28fbab7eb_subdural 0.5" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_submission=pd.read_csv(data_dir+'stage_1_sample_submission.csv')\n", "sample_submission.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDLabelSub_typePatientID
0ID_28fbab7eb_epidural0.5epidural28fbab7eb
1ID_28fbab7eb_intraparenchymal0.5intraparenchymal28fbab7eb
2ID_28fbab7eb_intraventricular0.5intraventricular28fbab7eb
3ID_28fbab7eb_subarachnoid0.5subarachnoid28fbab7eb
4ID_28fbab7eb_subdural0.5subdural28fbab7eb
\n", "
" ], "text/plain": [ " ID Label Sub_type PatientID\n", "0 ID_28fbab7eb_epidural 0.5 epidural 28fbab7eb\n", "1 ID_28fbab7eb_intraparenchymal 0.5 intraparenchymal 28fbab7eb\n", "2 ID_28fbab7eb_intraventricular 0.5 intraventricular 28fbab7eb\n", "3 ID_28fbab7eb_subarachnoid 0.5 subarachnoid 28fbab7eb\n", "4 ID_28fbab7eb_subdural 0.5 subdural 28fbab7eb" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_base_df=sample_submission.copy()\n", "test_base_df['Sub_type'] = test_base_df['ID'].str.split(\"_\", n = 3, expand = True)[2]\n", "test_base_df['PatientID'] = test_base_df['ID'].str.split(\"_\", n = 3, expand = True)[1]\n", "test_base_df.head()" ] }, { "cell_type": "code", "execution_count": 234, "metadata": {}, "outputs": [], "source": [ "test_ids=test_df.PatientID.unique()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid',\n", " 'subdural', 'any'], dtype=object)" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub_types=test_base_df.Sub_type.unique()\n", "sub_types" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f153620a2b61441bb7cb7f1acf392844", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, max=6), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "189e8ac701514c2c963161608d43a459", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, max=5), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "dfs =[]\n", "for sub_type in tqdm_notebook(sub_types):\n", " df = test_base_df[test_base_df.Sub_type==sub_type][['PatientID','Label']].copy()\n", " df=df.rename(columns={\"Label\": sub_type}).reset_index(drop=True)\n", " dfs.append(df)\n", "test_df=dfs[0]\n", "for df in tqdm_notebook(dfs[1:]):\n", " test_df=test_df.merge(df,on='PatientID')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PatientIDepiduralintraparenchymalintraventricularsubarachnoidsubduralany
028fbab7eb0.50.50.50.50.50.5
1877923b8b0.50.50.50.50.50.5
2a591477cb0.50.50.50.50.50.5
342217c8980.50.50.50.50.50.5
4a130c4d2f0.50.50.50.50.50.5
\n", "
" ], "text/plain": [ " PatientID epidural intraparenchymal intraventricular subarachnoid \\\n", "0 28fbab7eb 0.5 0.5 0.5 0.5 \n", "1 877923b8b 0.5 0.5 0.5 0.5 \n", "2 a591477cb 0.5 0.5 0.5 0.5 \n", "3 42217c898 0.5 0.5 0.5 0.5 \n", "4 a130c4d2f 0.5 0.5 0.5 0.5 \n", "\n", " subdural any \n", "0 0.5 0.5 \n", "1 0.5 0.5 \n", "2 0.5 0.5 \n", "3 0.5 0.5 \n", "4 0.5 0.5 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_df.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5fe21e059b0f4fff9decf2a356119816", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, max=78545), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "PID = np.zeros(test_df.shape[0],dtype=object)\n", "StudyI = np.zeros(test_df.shape[0],dtype=object)\n", "SeriesI = np.zeros(test_df.shape[0],dtype=object)\n", "WindowCenter = np.zeros(test_df.shape[0],dtype=object)\n", "WindowWidth = np.zeros(test_df.shape[0],dtype=object)\n", "ImagePositionX = np.zeros(test_df.shape[0],dtype=np.float)\n", "ImagePositionY = np.zeros(test_df.shape[0],dtype=np.float)\n", "ImagePositionZ = np.zeros(test_df.shape[0],dtype=np.float)\n", "\n", "for i,row in tqdm_notebook(test_df.iterrows(),total=test_df.shape[0]):\n", " ds = pydicom.dcmread(test_images_dir + 'ID_{}.dcm'.format(row['PatientID']))\n", " SeriesI[i]=ds.SeriesInstanceUID\n", " PID[i]=ds.PatientID\n", " StudyI[i]=ds.StudyInstanceUID \n", " WindowCenter[i]=ds.WindowCenter\n", " WindowWidth[i]=ds.WindowWidth\n", " ImagePositionX[i]=float(ds.ImagePositionPatient[0])\n", " ImagePositionY[i]=float(ds.ImagePositionPatient[1])\n", " ImagePositionZ[i]=float(ds.ImagePositionPatient[2])\n", "test_df['SeriesI']=SeriesI\n", "test_df['PID']=PID\n", "test_df['StudyI']=StudyI\n", "test_df['WindowCenter']=WindowCenter\n", "test_df['WindowWidth']=WindowWidth\n", "test_df['ImagePositionZ']=ImagePositionZ\n", "test_df['ImagePositionX']=ImagePositionX\n", "test_df['ImagePositionY']=ImagePositionY\n" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "test_df.to_csv(data_dir+'test.csv',index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "1+1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }