import random
#from skimage import io
import numpy as np
from glob import glob
import SimpleITK as sitk
from keras.utils import np_utils
class Pipeline(object):
def __init__(self, list_train ,Normalize=True):
self.scans_train = list_train
self.train_im=self.read_scans(Normalize)
def read_scans(self,Normalize):
train_im=[]
for i in range(len( self.scans_train)):
if i%10==0:
print('iteration [{}]'.format(i))
flair = glob( self.scans_train[i] + '/*_flair.nii.gz')
t2 = glob( self.scans_train[i] + '/*_t2.nii.gz')
gt = glob( self.scans_train[i] + '/*_seg.nii.gz')
t1 = glob( self.scans_train[i] + '/*_t1.nii.gz')
t1c = glob( self.scans_train[i] + '/*_t1ce.nii.gz')
t1s=[scan for scan in t1 if scan not in t1c]
if (len(flair)+len(t2)+len(gt)+len(t1s)+len(t1c))<5:
print("there is a problem here!!! the problem lies in this patient :", self.scans_train[i])
continue
scans = [flair[0], t1s[0], t1c[0], t2[0], gt[0]]
#read a volume composed of 4 modalities
tmp = [sitk.GetArrayFromImage(sitk.ReadImage(scans[k])) for k in range(len(scans))]
#crop each volume to have a size of (146,192,152) to discard some unwanted background and thus save some computational power ;)
z0=1
y0=29
x0=42
z1=147
y1=221
x1=194
tmp=np.array(tmp)
tmp=tmp[:,z0:z1,y0:y1,x0:x1]
#normalize each slice
if Normalize==True:
tmp=self.norm_slices(tmp)
train_im.append(tmp)
del tmp
return np.array(train_im)
def sample_patches_randomly(self, num_patches, d , h , w ):
'''
INPUT:
num_patches : the total number of samled patches
d : this correspnds to the number of channels which is ,in our case, 4 MRI modalities
h : height of the patch
w : width of the patch
OUTPUT:
patches : np array containing the randomly sampled patches
labels : np array containing the corresping target patches
'''
patches, labels = [], []
count = 0
#swap axes to make axis 0 represents the modality and axis 1 represents the slice. take the ground truth
gt_im = np.swapaxes(self.train_im, 0, 1)[4]
#take flair image as mask
msk = np.swapaxes(self.train_im, 0, 1)[0]
#save the shape of the grounf truth to use it afterwards
tmp_shp = gt_im.shape
#reshape the mask and the ground truth to 1D array
gt_im = gt_im.reshape(-1).astype(np.uint8)
msk = msk.reshape(-1).astype(np.float32)
# maintain list of 1D indices while discarding 0 intensities
indices = np.squeeze(np.argwhere((msk!=-9.0) & (msk!=0.0)))
del msk
# shuffle the list of indices of the class
np.random.shuffle(indices)
#reshape gt_im
gt_im = gt_im.reshape(tmp_shp)
#a loop to sample the patches from the images
i = 0
pix = len(indices)
while (count<num_patches) and (pix>i):
#randomly choose an index
ind = indices[i]
i+= 1
#reshape ind to 3D index
ind = np.unravel_index(ind, tmp_shp)
# get the patient and the slice id
patient_id = ind[0]
slice_idx=ind[1]
p = ind[2:]
#construct the patch by defining the coordinates
p_y = (p[0] - (h)/2, p[0] + (h)/2)
p_x = (p[1] - (w)/2, p[1] + (w)/2)
p_x=list(map(int,p_x))
p_y=list(map(int,p_y))
#take patches from all modalities and group them together
tmp = self.train_im[patient_id][0:4, slice_idx,p_y[0]:p_y[1], p_x[0]:p_x[1]]
#take the coresponding label patch
lbl=gt_im[patient_id,slice_idx,p_y[0]:p_y[1], p_x[0]:p_x[1]]
#keep only paches that have the desired size
if tmp.shape != (d, h, w) :
continue
patches.append(tmp)
labels.append(lbl)
count+=1
patches = np.array(patches)
labels=np.array(labels)
return patches, labels
def norm_slices(self,slice_not):
'''
normalizes each slice , excluding gt
subtracts mean and div by std dev for each slice
clips top and bottom one percent of pixel intensities
'''
normed_slices = np.zeros(( 5,146, 192, 152)).astype(np.float32)
for slice_ix in range(4):
normed_slices[slice_ix] = slice_not[slice_ix]
for mode_ix in range(146):
normed_slices[slice_ix][mode_ix] = self._normalize(slice_not[slice_ix][mode_ix])
normed_slices[-1]=slice_not[-1]
return normed_slices
def _normalize(self,slice):
'''
input: unnormalized slice
OUTPUT: normalized clipped slice
'''
b = np.percentile(slice, 99)
t = np.percentile(slice, 1)
slice = np.clip(slice, t, b)
image_nonzero = slice[np.nonzero(slice)]
if np.std(slice)==0 or np.std(image_nonzero) == 0:
return slice
else:
tmp= (slice - np.mean(image_nonzero)) / np.std(image_nonzero)
#since the range of intensities is between 0 and 5000 ,the min in the normalized slice corresponds to 0 intensity in unnormalized slice
#the min is replaced with -9 just to keep track of 0 intensities so that we can discard those intensities afterwards when sampling random patches
tmp[tmp==tmp.min()]=-9
return tmp
'''
def save_image_png (img,output_file="img.png"):
"""
save 2d image to disk in a png format
"""
img=np.array(img).astype(np.float32)
if np.max(img) != 0:
img /= np.max(img) # set values < 1
if np.min(img) <= -1: # set values > -1
img /= abs(np.min(img))
io.imsave(output_file, img)
'''
def concatenate ():
'''
concatenate two parts into one dataset
this can be avoided if there is enough RAM as we can directly from the whole dataset
'''
Y_labels_2=np.load("y_dataset_second_part.npy").astype(np.uint8)
X_patches_2=np.load("x_dataset_second_part.npy").astype(np.float32)
Y_labels_1=np.load("y_dataset_first_part.npy").astype(np.uint8)
X_patches_1=np.load("x_dataset_first_part.npy").astype(np.float32)
#concatenate both parts
X_patches=np.concatenate((X_patches_1, X_patches_2), axis=0)
Y_labels=np.concatenate((Y_labels_1, Y_labels_2), axis=0)
del Y_labels_2,X_patches_2,Y_labels_1,X_patches_1
#shuffle the whole dataset
shuffle = list(zip(X_patches, Y_labels))
np.random.seed(138)
np.random.shuffle(shuffle)
X_patches = np.array([shuffle[i][0] for i in range(len(shuffle))])
Y_labels = np.array([shuffle[i][1] for i in range(len(shuffle))])
del shuffle
np.save( "x_training",X_patches.astype(np.float32) )
np.save( "y_training",Y_labels.astype(np.uint8))
#np.save( "x_valid",X_patches_valid.astype(np.float32) )
#np.save( "y_valid",Y_labels_valid.astype(np.uint8))
if __name__ == '__main__':
#Paths for Brats2017 dataset
path_HGG = glob('Brats2017/Brats17TrainingData/HGG/**')
path_LGG = glob('Brats2017/Brats17TrainingData/LGG/**')
path_all=path_HGG+path_LGG
#shuffle the dataset
np.random.seed(2022)
np.random.shuffle(path_all)
np.random.seed(1555)
start=0
end=20
#set the total number of patches
#this formula extracts approximately 3 patches per slice
num_patches=146*(end-start)*3
#define the size of a patch
h=128
w=128
d=4
pipe=Pipeline(list_train=path_all[start:end],Normalize=True)
Patches,Y_labels=pipe.sample_patches_randomly(num_patches,d, h, w)
#transform the data to channels_last keras format
Patches=np.transpose(Patches,(0,2,3,1)).astype(np.float32)
# since the brats2017 dataset has only 4 labels,namely 0,1,2 and 4 as opposed to previous datasets
# this transormation is done so that we will have 4 classes when we one-hot encode the targets
Y_labels[Y_labels==4]=3
#transform y to one_hot enconding for keras
shp=Y_labels.shape[0]
Y_labels=Y_labels.reshape(-1)
Y_labels = np_utils.to_categorical(Y_labels).astype(np.uint8)
Y_labels=Y_labels.reshape(shp,h,w,4)
#shuffle the whole dataset
shuffle = list(zip(Patches, Y_labels))
np.random.seed(180)
np.random.shuffle(shuffle)
Patches = np.array([shuffle[i][0] for i in range(len(shuffle))])
Y_labels = np.array([shuffle[i][1] for i in range(len(shuffle))])
del shuffle
print("Size of the patches : ",Patches.shape)
print("Size of their correponding targets : ",Y_labels.shape)
#save to disk as npy files
#np.save( "x_dataset_first_part",Patches )
#np.save( "y_dataset_first_part",Y_labels)