--- a +++ b/DataPreparation.ipynb @@ -0,0 +1,252 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "import numpy as np\n", + "from rt_utils import RTStructBuilder\n", + "import matplotlib.pyplot as plt\n", + "import pydicom \n", + "from PIL import Image\n", + "CT_PATH = './CT_Slices'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def transform_to_hu(medical_image):\n", + " intercept = medical_image.RescaleIntercept\n", + " slope = medical_image.RescaleSlope\n", + " image = medical_image.pixel_array\n", + " hu_image = image * slope + intercept\n", + "\n", + " return hu_image\n", + "\n", + "def apply_windowing(ds, window_center, window_width):\n", + " # Load the DICOM file\n", + " \n", + " # Get the pixel data and rescale to Hounsfield units (HU)\n", + " pixel_array = ds.pixel_array.astype(np.float32)\n", + " intercept = ds.RescaleIntercept\n", + " slope = ds.RescaleSlope\n", + " hu_array = pixel_array * slope + intercept\n", + " \n", + " # Apply windowing\n", + " window_min = window_center - (window_width / 2)\n", + " window_max = window_center + (window_width / 2)\n", + " windowed_array = np.clip(hu_array, window_min, window_max)\n", + " \n", + " # Normalize the windowed array to [0, 1]\n", + " normalized_array = (windowed_array - window_min) / (window_max - window_min)\n", + " \n", + " return normalized_array\n", + "\n", + "\n", + "def set_window(img, hu=[-800.,1200.]):\n", + " window = np.array(hu)\n", + " newimg = (img-window[0]) / (window[1]-window[0])\n", + " newimg[newimg < 0] = 0\n", + " newimg[newimg > 1] = 1\n", + " newimg = (newimg * 255).astype('uint8')\n", + " return newimg\n", + "\n", + "\n", + "def zero_center(hu_image):\n", + " hu_image = hu_image - np.mean(hu_image)\n", + " return hu_image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def rename_folders(path):\n", + " for patient in os.listdir(path):\n", + " old_path = os.path.join(path, patient)\n", + " new_name = re.sub(r'[^0-9]','',patient)\n", + " new_path = os.path.join(path, new_name)\n", + " os.rename(old_path, new_name)\n", + " \n", + "rename_folders(CT_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "DATA_PATH = './data/'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "def create_ds_structure(path):\n", + " for patient in os.listdir(path):\n", + " p_path = os.path.join(path, patient)\n", + " dicom_path = os.path.join(p_path, 'dicom')\n", + " r_path = os.path.join(p_path, 'r')\n", + " if not os.path.exists(dicom_path):\n", + " os.mkdir(dicom_path)\n", + " if not os.path.exists(r_path):\n", + " os.mkdir(r_path)\n", + " for file in os.listdir(p_path):\n", + " file_path = os.path.join(p_path, file)\n", + " if os.path.isfile(file_path):\n", + " if(file[0:2]=='CT'):\n", + " new_path = os.path.join(dicom_path, file)\n", + " shutil.move(file_path, new_path)\n", + " if(file[0:2]=='RS'):\n", + " new_path = os.path.join(r_path, file)\n", + " shutil.move(file_path, new_path)\n", + "\n", + " \n", + "\n", + "create_ds_structure(DATA_PATH) \n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "# Load existing RT Struct. Requires the series path and existing RT Struct path\n", + "rtstruct = RTStructBuilder.create_from(\n", + " dicom_series_path=\"./data/01/dicom\", \n", + " rt_struct_path=\"./data/01/r/RS.1.2.410.200018.1001.1.3.89143765.2.20151123111228992.dcm\"\n", + ")\n", + "\n", + "# View all of the ROI names from within the image\n", + "print(rtstruct.get_roi_names())\n", + "ds = []\n", + "for image in os.listdir('./data/01/dicom/'):\n", + " img_path = os.path.join('./data/01/dicom/', image)\n", + " ds.append(pydicom.dcmread(img_path))\n", + "\n", + "\n", + "\n", + "rtstruct" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "img_idx = 33\n", + "mask_idx = 44 -img_idx\n", + "ds_img = apply_windowing(ds[img_idx], 35, 350)\n", + "# Loading the 3D Mask from within the RT Struct\n", + "mask_3d = rtstruct.get_roi_mask_by_name(\"heart\")\n", + "\n", + "\n", + "fliped = np.flip(mask_3d, axis=2)\n", + "# Display one slice of the region\n", + "first_mask_slice = fliped[:, :, img_idx]\n", + "plt.figure(figsize = (5,5))\n", + "plt.imshow(first_mask_slice)\n", + "#plt.imshow(ds_img, cmap='gray',alpha=0.9)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "IMG_PATH = './2d_data/images/'\n", + "MASKS_PATH = './2d_data/masks/'\n", + "def create_2d_dataset(path, img_path_save, mask_path_save):\n", + " for patient in os.listdir(path):\n", + " p_path = os.path.join(path, patient)\n", + "\n", + " dicom_path = os.path.join(p_path, 'dicom')\n", + " \n", + " r_path = os.path.join(p_path, 'r')\n", + "\n", + " r_file = os.listdir(r_path)[0]\n", + " r_file = os.path.join(r_path, r_file)\n", + "\n", + "\n", + " try: \n", + " rtstruct = RTStructBuilder.create_from(\n", + " dicom_series_path=dicom_path, \n", + " rt_struct_path=r_file\n", + " ) \n", + " mask_3d = rtstruct.get_roi_mask_by_name(\"heart\")\n", + " fliped = np.flip(mask_3d, axis=2)\n", + " idx = 0\n", + " for dcm_file in os.listdir(dicom_path):\n", + " dcm_file_path = os.path.join(dicom_path, dcm_file)\n", + " new_dcm_file = os.path.join(img_path_save,(patient+'_'+dcm_file))\n", + "\n", + " mask_filename = new_dcm_file.replace('.dcm', '_mask.png')\n", + " mask_filename = mask_filename.replace('images', 'masks')\n", + " #print(mask_filename)\n", + " mask_tosave = Image.fromarray(fliped[:,:,idx])\n", + " if not os.path.exists(new_dcm_file):\n", + " shutil.copy(dcm_file_path, new_dcm_file)\n", + " if not os.path.exists(mask_filename):\n", + " mask_tosave.save(mask_filename)\n", + "\n", + " idx+=1\n", + " except: \n", + " print(f'Skipping patient {patient}')\n", + " \n", + " \n", + "\n", + "create_2d_dataset(DATA_PATH, IMG_PATH, MASKS_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "DL", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}