[ec6ff6]: / 3d-ircadb-01_util.ipynb

Download this file

379 lines (378 with data), 14.1 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os, glob\n",
    "import sys\n",
    "import copy \n",
    "import pydicom\n",
    "import scipy\n",
    "import scipy.misc\n",
    "import numpy as np\n",
    "import cv2\n",
    "import imageio\n",
    "from scipy.ndimage import rotate\n",
    "from PIL import Image\n",
    "from zipfile import ZipFile"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Download the 3D-IRCADb-01 dataset from https://www.ircad.fr/research/3d-ircadb-01/ and into a directory in the root of the repo and name it Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_path = \"Dataset\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#ONLY DO THIS STEP IF YOU'VE DOWNLOADED THE ENTIRE DATASET AT ONCE NOT PATIENT BY PATIENT\n",
    "with ZipFile(os.path.join(dataset_path,\"3Dircadb1.zip\"), 'r') as zipObj:\n",
    "        try:\n",
    "            zipObj.extractall(dataset_path)\n",
    "        except Exception as error:\n",
    "            print(error)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_path = \"Dataset\" # You can change the path of the directory if you have the dataset elsewhere\n",
    "# EXTRACT EVERY PATIENT'S .zip FILE\n",
    "\n",
    "for file in os.listdir(dataset_path):\n",
    "    file_path = os.path.join(dataset_path, file)\n",
    "    if file_path.endswith(\".zip\"):\n",
    "        with ZipFile(file_path, 'r') as zipObj:\n",
    "            try:\n",
    "                zipObj.extractall(dataset_path)\n",
    "            except Exception as error:\n",
    "                print(error)\n",
    "    \n",
    "\n",
    "# GO INTO THE JUST EXTRACTED DIRECTORIES AND EXTRACT THE .zip FILES THAT CONTAIN THE PATIENT SLICES AND MASKS\n",
    "\n",
    "for dir_path in os.listdir(dataset_path):\n",
    "    dir_full_path = os.path.join(dataset_path, dir_path)\n",
    "    if os.path.isdir(dir_full_path):\n",
    "        if os.path.isfile(os.path.join(dir_full_path,\"PATIENT_DICOM.zip\")):\n",
    "            with ZipFile(os.path.join(dir_full_path,\"PATIENT_DICOM.zip\"), 'r') as zipObj:\n",
    "                zipObj.extractall(dir_full_path)  \n",
    "\n",
    "        if os.path.isfile(os.path.join(dir_full_path,\"MASKS_DICOM.zip\")):\n",
    "            with ZipFile(os.path.join(dir_full_path,\"MASKS_DICOM.zip\"), 'r') as zipObj:\n",
    "                zipObj.extractall(dir_full_path) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# CHANGE DATASET STRUCTURE\n",
    "# PREFIX FILES IN THE DIRECTORY WITH THE PATIENT'S ID\n",
    "for folder in os.listdir(\"Dataset\"):\n",
    "    x = folder.split('db')\n",
    "    folder_path = os.path.join('Dataset', folder)\n",
    "    if os.path.isdir(folder_path):\n",
    "        for subfolder in os.listdir(folder_path):\n",
    "            subfolder_path = os.path.join(folder_path,subfolder)\n",
    "            if os.path.isdir(subfolder_path):\n",
    "                for sub_sub in os.listdir(subfolder_path):\n",
    "                    if os.path.isdir(os.path.join(subfolder_path, sub_sub)):\n",
    "                        for image in os.listdir(os.path.join(subfolder_path, sub_sub)):\n",
    "                            src= os.path.join(subfolder_path, sub_sub, image)\n",
    "                            dst = os.path.join(subfolder_path, sub_sub, x[1]+'_'+image)\n",
    "                            os.rename(src, dst)\n",
    "                            \n",
    "                    src= os.path.join(subfolder_path, sub_sub)\n",
    "                    dst = os.path.join(subfolder_path, x[1]+'_'+sub_sub)\n",
    "                    os.rename(src, dst)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "directory_to_move = \"PATIENT_DICOM\"\n",
    "desired_scans_path = os.path.join(\"train\", \"patients\")\n",
    "desired_masks_path = os.path.join(\"train\", \"masks\")\n",
    "try:  \n",
    "    os.mkdir(\"train\")  \n",
    "except OSError as error:  \n",
    "    print(error)\n",
    "\n",
    "try:  \n",
    "    os.mkdir(desired_scans_path)  \n",
    "except OSError as error:  \n",
    "    print(error)\n",
    "\n",
    "for patient_path in os.listdir(dataset_path):\n",
    "    patient_full_path = os.path.join(dataset_path, patient_path)\n",
    "    if os.path.isdir(patient_full_path):\n",
    "        patient_scans_path = os.path.join(patient_full_path, \"PATIENT_DICOM\")\n",
    "        if os.path.isdir(patient_scans_path):\n",
    "            for scan in os.listdir(patient_scans_path):\n",
    "                os.rename(os.path.join(patient_scans_path, scan), os.path.join(desired_scans_path, scan))\n",
    "\n",
    "\n",
    "\n",
    "try:  \n",
    "    os.mkdir(desired_masks_path)  \n",
    "except OSError as error:  \n",
    "    print(error)\n",
    "\n",
    "for patient_path in os.listdir(dataset_path):\n",
    "    patient_full_path = os.path.join(dataset_path, patient_path)\n",
    "    if(os.path.isdir(patient_full_path)):\n",
    "        patient_masks_path = os.path.join(patient_full_path, \"MASKS_DICOM\")\n",
    "        if os.path.isdir(patient_masks_path):\n",
    "            for scan in os.listdir(patient_masks_path):\n",
    "                os.replace(os.path.join(patient_masks_path, scan), os.path.join(desired_masks_path, scan))\n",
    "\n",
    "try:  \n",
    "    os.mkdir(os.path.join(desired_masks_path, \"merged_livertumors\"))  \n",
    "except OSError as error:  \n",
    "    print(error)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# COUNT THE NUMBER OF SCANS FOR 1 PATIENT TO APPEND ON THEM\n",
    "def count_scans_startwith(directory, prefix):\n",
    "    count = 0\n",
    "    for file in os.listdir(directory):\n",
    "        if file.startswith(prefix+'_'):\n",
    "            count+=1\n",
    "    return count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# MERGE ALL LIVERTUMORS IN 1 MASK\n",
    "def merge_livertumors(scans_path, masks_path):\n",
    "    for scan in os.listdir(scans_path):\n",
    "        patient_id = scan.split('_')[0]\n",
    "        tumor_volume = None\n",
    "        for mask_dir in os.listdir(masks_path):\n",
    "            if mask_dir.startswith(patient_id+'_livertumor'):\n",
    "                current_tumor = pydicom.dcmread(os.path.join(masks_path, mask_dir, scan)).pixel_array\n",
    "                current_tumor = current_tumor/255.0\n",
    "                current_tumor = np.clip(current_tumor,0,1)\n",
    "                tumor_volume = current_tumor if tumor_volume is None else np.logical_or(tumor_volume,current_tumor)\n",
    "    \n",
    "        if tumor_volume is None:\n",
    "            tumor_volume = np.zeros((512,512))\n",
    "        tumor_volume = [[j*255 for j in i] for i in tumor_volume]\n",
    "        \n",
    "    \n",
    "        tumor_volume = np.array(tumor_volume)\n",
    "        tumor_volume = tumor_volume.astype(np.uint8)\n",
    "        im = Image.fromarray(tumor_volume)\n",
    "        im.save(os.path.join(masks_path, 'merged_livertumors', scan+'.jpg'))\n",
    "#         imageio.imwrite(os.path.join(masks_path, 'merged_livertumors', scan+'.jpg'), tumor_volume)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# REFLECT IMAGE AND MASK TO AUGMENT DATA\n",
    "def reflect_dicom(src_img, src_mask, src_liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count):\n",
    "    reflected_img = copy.deepcopy(src_img)\n",
    "    reflected_img.PixelData = np.fliplr(reflected_img.pixel_array).tobytes()\n",
    "    \n",
    "\n",
    "    reflected_liver_mask = copy.deepcopy(src_liver_mask)\n",
    "    reflected_liver_mask.PixelData = np.fliplr(reflected_liver_mask.pixel_array).tobytes()\n",
    "    reflected_liver_mask.save_as(os.path.join(liver_mask_path, patient_id+'_image_'+str(patient_imgs_count)+'_augref'))\n",
    "            \n",
    "    reflected_mask = np.fliplr(src_mask)\n",
    "    \n",
    "   \n",
    "    reflected_img.save_as(os.path.join(train_path, patient_id+'_image_'+str(patient_imgs_count)+'_augref'))\n",
    "\n",
    "    reflected_mask = reflected_mask.astype(np.uint8)\n",
    "    imageio.imwrite(os.path.join(masks_path, 'merged_livertumors', patient_id+'_image_'+str(patient_imgs_count)+'_augref.jpg'), reflected_mask)\n",
    "                    \n",
    "    \n",
    "# ROTATE IMAGE AND MASK TO AUGMENT DATA\n",
    "def rotate_dicom(src_img, src_mask, src_liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count, angle):\n",
    "    rotated_img = copy.deepcopy(src_img)\n",
    "    rotated_img.PixelData = rotate(rotated_img.pixel_array, angle, reshape=False).tobytes()\n",
    "    \n",
    "\n",
    "    rotated_liver_mask = copy.deepcopy(src_liver_mask)\n",
    "    rotated_liver_mask.PixelData = rotate(rotated_liver_mask.pixel_array, angle, reshape=False).tobytes()\n",
    "    rotated_liver_mask.save_as(os.path.join(liver_mask_path, patient_id+'_image_'+str(patient_imgs_count)+'_augrot'))\n",
    "    \n",
    "    rotated_mask = rotate(src_mask, angle, reshape=False)\n",
    "    \n",
    "\n",
    "    rotated_img.save_as(os.path.join(train_path, patient_id+'_image_'+str(patient_imgs_count)+'_augrot'))\n",
    "    \n",
    "    rotated_mask = rotated_mask.astype(np.uint8)\n",
    "    imageio.imwrite(os.path.join(masks_path, 'merged_livertumors', patient_id+'_image_'+str(patient_imgs_count)+'_augrot.jpg'), rotated_mask)    \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# AUGMENT THE MASKS WITH TUMORS TO FIX CLASS IMBALANCING\n",
    "def augment_dicom(train_path, masks_path):\n",
    "    rotation_angles = [x * 10 for x in range(36) if x > 0]\n",
    "    train_files = copy.deepcopy(os.listdir(train_path))\n",
    "    \n",
    "    for scan in train_files:\n",
    "        mask_path = os.path.join(masks_path, 'merged_livertumors', scan+'.jpg')\n",
    "        tumor_mask = cv2.imread(mask_path)\n",
    "        if 1 in tumor_mask:\n",
    "            patient_id = scan.split('_')[0]\n",
    "            patient_imgs_count = count_scans_startwith(train_path, patient_id)\n",
    "            original_img = pydicom.dcmread(os.path.join(train_path, scan)) \n",
    "            liver_mask_path = os.path.join(masks_path, patient_id+'_liver')\n",
    "            liver_mask = pydicom.dcmread(os.path.join(liver_mask_path, scan))\n",
    "            reflect_dicom(original_img, tumor_mask, liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count)\n",
    "            \n",
    "            for angle in rotation_angles:\n",
    "                patient_imgs_count += 1 \n",
    "                rotate_dicom(original_img, tumor_mask, liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count, angle)\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "done\n"
     ]
    }
   ],
   "source": [
    "masks_path = os.path.join('train','masks')\n",
    "training_path = os.path.join('train','patients')\n",
    "\n",
    "augment_dicom(training_path, masks_path)\n",
    "# merge_livertumors(training_path, masks_path)\n",
    "print('done')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "20974\n"
     ]
    }
   ],
   "source": [
    "count = 0\n",
    "for mask in os.listdir(os.path.join(masks_path,'merged_livertumors')):\n",
    "    tumor_mask = cv2.imread(os.path.join(masks_path,'merged_livertumors', mask))\n",
    "    if 1 in tumor_mask:\n",
    "        count += 1\n",
    "print(count)\n",
    "# 568"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "done\n"
     ]
    }
   ],
   "source": [
    "# REMOVE AUGMENTED DATA\n",
    "for scan in os.listdir(training_path):\n",
    "    if scan.endswith('_augref') or scan.endswith('_augrot'):\n",
    "        os.remove(os.path.join(training_path,scan))\n",
    "\n",
    "for mask_dir in os.listdir(masks_path):\n",
    "    if mask_dir.endswith('liver'):\n",
    "        mask_dir_path = os.path.join(masks_path, mask_dir)\n",
    "        for liver_mask in os.listdir(mask_dir_path):\n",
    "            if liver_mask.endswith('augref') or liver_mask.endswith('augrot') or liver_mask.endswith('aug'):\n",
    "                os.remove(os.path.join(mask_dir_path, liver_mask))\n",
    "\n",
    "for mask in os.listdir(os.path.join(masks_path, 'merged_livertumors')):\n",
    "    if mask.endswith('_augref.jpg') or mask.endswith('augrot.jpg'):\n",
    "        os.remove(os.path.join(masks_path, 'merged_livertumors', mask))\n",
    "print('done')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}