In [1]:
import os, glob
import sys
import copy 
import pydicom
import scipy
import scipy.misc
import numpy as np
import cv2
import imageio
from scipy.ndimage import rotate
from PIL import Image
from zipfile import ZipFile

### Download the 3D-IRCADb-01 dataset from https://www.ircad.fr/research/3d-ircadb-01/ and into a directory in the root of the repo and name it Dataset

In [None]:
dataset_path = "Dataset"

In [None]:
#ONLY DO THIS STEP IF YOU'VE DOWNLOADED THE ENTIRE DATASET AT ONCE NOT PATIENT BY PATIENT
with ZipFile(os.path.join(dataset_path,"3Dircadb1.zip"), 'r') as zipObj:
        try:
            zipObj.extractall(dataset_path)
        except Exception as error:
            print(error)

In [None]:
dataset_path = "Dataset" # You can change the path of the directory if you have the dataset elsewhere
# EXTRACT EVERY PATIENT'S .zip FILE

for file in os.listdir(dataset_path):
    file_path = os.path.join(dataset_path, file)
    if file_path.endswith(".zip"):
        with ZipFile(file_path, 'r') as zipObj:
            try:
                zipObj.extractall(dataset_path)
            except Exception as error:
                print(error)
    

# GO INTO THE JUST EXTRACTED DIRECTORIES AND EXTRACT THE .zip FILES THAT CONTAIN THE PATIENT SLICES AND MASKS

for dir_path in os.listdir(dataset_path):
    dir_full_path = os.path.join(dataset_path, dir_path)
    if os.path.isdir(dir_full_path):
        if os.path.isfile(os.path.join(dir_full_path,"PATIENT_DICOM.zip")):
            with ZipFile(os.path.join(dir_full_path,"PATIENT_DICOM.zip"), 'r') as zipObj:
                zipObj.extractall(dir_full_path)  

        if os.path.isfile(os.path.join(dir_full_path,"MASKS_DICOM.zip")):
            with ZipFile(os.path.join(dir_full_path,"MASKS_DICOM.zip"), 'r') as zipObj:
                zipObj.extractall(dir_full_path) 

In [None]:
# CHANGE DATASET STRUCTURE
# PREFIX FILES IN THE DIRECTORY WITH THE PATIENT'S ID
for folder in os.listdir("Dataset"):
    x = folder.split('db')
    folder_path = os.path.join('Dataset', folder)
    if os.path.isdir(folder_path):
        for subfolder in os.listdir(folder_path):
            subfolder_path = os.path.join(folder_path,subfolder)
            if os.path.isdir(subfolder_path):
                for sub_sub in os.listdir(subfolder_path):
                    if os.path.isdir(os.path.join(subfolder_path, sub_sub)):
                        for image in os.listdir(os.path.join(subfolder_path, sub_sub)):
                            src= os.path.join(subfolder_path, sub_sub, image)
                            dst = os.path.join(subfolder_path, sub_sub, x[1]+'_'+image)
                            os.rename(src, dst)
                            
                    src= os.path.join(subfolder_path, sub_sub)
                    dst = os.path.join(subfolder_path, x[1]+'_'+sub_sub)
                    os.rename(src, dst)

In [None]:
directory_to_move = "PATIENT_DICOM"
desired_scans_path = os.path.join("train", "patients")
desired_masks_path = os.path.join("train", "masks")
try:  
    os.mkdir("train")  
except OSError as error:  
    print(error)

try:  
    os.mkdir(desired_scans_path)  
except OSError as error:  
    print(error)

for patient_path in os.listdir(dataset_path):
    patient_full_path = os.path.join(dataset_path, patient_path)
    if os.path.isdir(patient_full_path):
        patient_scans_path = os.path.join(patient_full_path, "PATIENT_DICOM")
        if os.path.isdir(patient_scans_path):
            for scan in os.listdir(patient_scans_path):
                os.rename(os.path.join(patient_scans_path, scan), os.path.join(desired_scans_path, scan))



try:  
    os.mkdir(desired_masks_path)  
except OSError as error:  
    print(error)

for patient_path in os.listdir(dataset_path):
    patient_full_path = os.path.join(dataset_path, patient_path)
    if(os.path.isdir(patient_full_path)):
        patient_masks_path = os.path.join(patient_full_path, "MASKS_DICOM")
        if os.path.isdir(patient_masks_path):
            for scan in os.listdir(patient_masks_path):
                os.replace(os.path.join(patient_masks_path, scan), os.path.join(desired_masks_path, scan))

try:  
    os.mkdir(os.path.join(desired_masks_path, "merged_livertumors"))  
except OSError as error:  
    print(error)

In [3]:
# COUNT THE NUMBER OF SCANS FOR 1 PATIENT TO APPEND ON THEM
def count_scans_startwith(directory, prefix):
    count = 0
    for file in os.listdir(directory):
        if file.startswith(prefix+'_'):
            count+=1
    return count

In [4]:
# MERGE ALL LIVERTUMORS IN 1 MASK
def merge_livertumors(scans_path, masks_path):
    for scan in os.listdir(scans_path):
        patient_id = scan.split('_')[0]
        tumor_volume = None
        for mask_dir in os.listdir(masks_path):
            if mask_dir.startswith(patient_id+'_livertumor'):
                current_tumor = pydicom.dcmread(os.path.join(masks_path, mask_dir, scan)).pixel_array
                current_tumor = current_tumor/255.0
                current_tumor = np.clip(current_tumor,0,1)
                tumor_volume = current_tumor if tumor_volume is None else np.logical_or(tumor_volume,current_tumor)
    
        if tumor_volume is None:
            tumor_volume = np.zeros((512,512))
        tumor_volume = [[j*255 for j in i] for i in tumor_volume]
        
    
        tumor_volume = np.array(tumor_volume)
        tumor_volume = tumor_volume.astype(np.uint8)
        im = Image.fromarray(tumor_volume)
        im.save(os.path.join(masks_path, 'merged_livertumors', scan+'.jpg'))
#         imageio.imwrite(os.path.join(masks_path, 'merged_livertumors', scan+'.jpg'), tumor_volume)

In [5]:
# REFLECT IMAGE AND MASK TO AUGMENT DATA
def reflect_dicom(src_img, src_mask, src_liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count):
    reflected_img = copy.deepcopy(src_img)
    reflected_img.PixelData = np.fliplr(reflected_img.pixel_array).tobytes()
    

    reflected_liver_mask = copy.deepcopy(src_liver_mask)
    reflected_liver_mask.PixelData = np.fliplr(reflected_liver_mask.pixel_array).tobytes()
    reflected_liver_mask.save_as(os.path.join(liver_mask_path, patient_id+'_image_'+str(patient_imgs_count)+'_augref'))
            
    reflected_mask = np.fliplr(src_mask)
    
   
    reflected_img.save_as(os.path.join(train_path, patient_id+'_image_'+str(patient_imgs_count)+'_augref'))

    reflected_mask = reflected_mask.astype(np.uint8)
    imageio.imwrite(os.path.join(masks_path, 'merged_livertumors', patient_id+'_image_'+str(patient_imgs_count)+'_augref.jpg'), reflected_mask)
                    
    
# ROTATE IMAGE AND MASK TO AUGMENT DATA
def rotate_dicom(src_img, src_mask, src_liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count, angle):
    rotated_img = copy.deepcopy(src_img)
    rotated_img.PixelData = rotate(rotated_img.pixel_array, angle, reshape=False).tobytes()
    

    rotated_liver_mask = copy.deepcopy(src_liver_mask)
    rotated_liver_mask.PixelData = rotate(rotated_liver_mask.pixel_array, angle, reshape=False).tobytes()
    rotated_liver_mask.save_as(os.path.join(liver_mask_path, patient_id+'_image_'+str(patient_imgs_count)+'_augrot'))
    
    rotated_mask = rotate(src_mask, angle, reshape=False)
    

    rotated_img.save_as(os.path.join(train_path, patient_id+'_image_'+str(patient_imgs_count)+'_augrot'))
    
    rotated_mask = rotated_mask.astype(np.uint8)
    imageio.imwrite(os.path.join(masks_path, 'merged_livertumors', patient_id+'_image_'+str(patient_imgs_count)+'_augrot.jpg'), rotated_mask)    
    

In [6]:
# AUGMENT THE MASKS WITH TUMORS TO FIX CLASS IMBALANCING
def augment_dicom(train_path, masks_path):
    rotation_angles = [x * 10 for x in range(36) if x > 0]
    train_files = copy.deepcopy(os.listdir(train_path))
    
    for scan in train_files:
        mask_path = os.path.join(masks_path, 'merged_livertumors', scan+'.jpg')
        tumor_mask = cv2.imread(mask_path)
        if 1 in tumor_mask:
            patient_id = scan.split('_')[0]
            patient_imgs_count = count_scans_startwith(train_path, patient_id)
            original_img = pydicom.dcmread(os.path.join(train_path, scan)) 
            liver_mask_path = os.path.join(masks_path, patient_id+'_liver')
            liver_mask = pydicom.dcmread(os.path.join(liver_mask_path, scan))
            reflect_dicom(original_img, tumor_mask, liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count)
            
            for angle in rotation_angles:
                patient_imgs_count += 1 
                rotate_dicom(original_img, tumor_mask, liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count, angle)
        

In [7]:
masks_path = os.path.join('train','masks')
training_path = os.path.join('train','patients')

augment_dicom(training_path, masks_path)
# merge_livertumors(training_path, masks_path)
print('done')

done


In [8]:
count = 0
for mask in os.listdir(os.path.join(masks_path,'merged_livertumors')):
    tumor_mask = cv2.imread(os.path.join(masks_path,'merged_livertumors', mask))
    if 1 in tumor_mask:
        count += 1
print(count)
# 568

20974


In [4]:
# REMOVE AUGMENTED DATA
for scan in os.listdir(training_path):
    if scan.endswith('_augref') or scan.endswith('_augrot'):
        os.remove(os.path.join(training_path,scan))

for mask_dir in os.listdir(masks_path):
    if mask_dir.endswith('liver'):
        mask_dir_path = os.path.join(masks_path, mask_dir)
        for liver_mask in os.listdir(mask_dir_path):
            if liver_mask.endswith('augref') or liver_mask.endswith('augrot') or liver_mask.endswith('aug'):
                os.remove(os.path.join(mask_dir_path, liver_mask))

for mask in os.listdir(os.path.join(masks_path, 'merged_livertumors')):
    if mask.endswith('_augref.jpg') or mask.endswith('augrot.jpg'):
        os.remove(os.path.join(masks_path, 'merged_livertumors', mask))
print('done')

done
