Brain-Tumor-Segm-2D / Git / [7b3b92] /prepare

Models:
SCallahan/
Brain-Tumor-Segm-2D
Downloads: 1
[7b3b92]: / prepare_data.py
History
Download this file
167 lines (129 with data), 6.9 kB

import os
import tables
import numpy as np
import nibabel as nib
from tqdm import tqdm
from glob import glob
from config import cfg

def read_brain(brain_dir, mode='train', x0=42, x1=194, y0=29, y1=221, z0=2, z1=146):

    """
    A function that reads and crops a brain modalities (nii.gz format)
    
    Parameters
    ----------
    brain_dir : string
        The path to a folder that contains MRI modalities of a specific brain
    mode : string
        'train' or 'validation' mode. The default is 'train'.
    x0, x1, y0, y1, z0, z1 : int
        The coordinates to crop brain volumes. For example, a brain volume with the 
        shape [x,y,z,modalites] is cropped [x0:x1, y0:y1, z0:z1, :] to have the shape
        [x1-x0, y1-y0, z1-z0, modalities]. One can calculate the x0,x1,... by calculating
        none zero pixels through dataset. Note that the final three shapes must be divisible
        by the network downscale rate.
        
    Returns
    -------
    all_modalities : array
        The cropped modalities (+ gt if mode='train')
    brain_affine : array
        The affine matrix of the input brain volume
    brain_name : str
        The name of the input brain volume

    """
    
    brain_dir = os.path.normpath(brain_dir)
    flair     = glob(os.path.join(brain_dir, '*_flair*.nii.gz'))
    t1        = glob(os.path.join(brain_dir, '*_t1*.nii.gz'))
    t1ce      = glob(os.path.join(brain_dir, '*_t1ce*.nii.gz'))
    t2        = glob(os.path.join(brain_dir, '*_t2*.nii.gz'))
    
    if mode=='train':
        gt             = glob( os.path.join(brain_dir, '*_seg*.nii.gz'))
        modalities_dir = [flair[0], t1[0], t1ce[0], t2[0], gt[0]]
        
    elif mode=='validation':
        modalities_dir = [flair[0], t1[0], t1ce[0], t2[0]]   
    
    all_modalities = []    
    for modality in modalities_dir:      
        nifti_file   = nib.load(modality)
        brain_numpy  = np.asarray(nifti_file.dataobj)    
        all_modalities.append(brain_numpy)
        
    # all modalities have the same affine, so we take one of them (the last one in this case),
    # affine is just saved for preparing the predicted nii.gz file in the future.       
    brain_affine   = nifti_file.affine
    all_modalities = np.array(all_modalities)
    all_modalities = np.rint(all_modalities).astype(np.int16)
    all_modalities = all_modalities[:, x0:x1, y0:y1, z0:z1]
    # to fit keras channel last model
    all_modalities = np.transpose(all_modalities) 
    # tumor grade + name
    brain_name     = os.path.basename(os.path.split(brain_dir)[0]) + '_' + os.path.basename(brain_dir) 

    return all_modalities, brain_affine, brain_name
    


def create_table(dataset_dir, table_data_shape, save_dir, crop_coordinates, data_channels, k_fold=None):
    
    """
    Reads and saves all brain volumes into a single table file. 
    
    Parameters
    ----------
    dataset_dir : 
        The path to all brain volumes (ex: suppose we have a folder 'BraTS2019' that 
        contains two HGG and LGG folders each of which contains some folders so:
        dataset_dir="./BraTS2019/*/*")
    table_data_shape : tuple
        A tuple which shows the final brain volume shape in the table
    data_channels : int
        Number of data channels/modalities
    save_dir : str
        The path to save table.
    crop_coordinates : dict
    k_fold : int
        k-fold cross-validation
        if specified, k .npy files will be saved. Each of these files shows the indexes of 
        brain volumes in that fold, which will be used for training the model.

    Returns
    -------
    None

    """
    
    all_brains_dir = glob(dataset_dir)
    all_brains_dir.sort()
    
    hdf5_file    = tables.open_file(os.path.join(save_dir + 'data.hdf5'), mode='w')
    filters      = tables.Filters(complevel=5, complib='blosc')
    data_shape   = tuple([0] + list(table_data_shape) + [data_channels])
    truth_shape  = tuple([0] + list(table_data_shape))
    affine_shape = tuple([0] + [4, 4])
    
    data_storage   = hdf5_file.create_earray(hdf5_file.root, 'data', tables.UInt16Atom(), shape=data_shape,
                                               filters=filters, expectedrows=len(all_brains_dir))
    truth_storage  = hdf5_file.create_earray(hdf5_file.root, 'truth', tables.UInt8Atom(), shape=truth_shape,
                                                filters=filters, expectedrows=len(all_brains_dir))
    affine_storage = hdf5_file.create_earray(hdf5_file.root, 'affine', tables.Float32Atom(), shape=affine_shape,
                                                filters=filters, expectedrows=len(all_brains_dir))
     
    brain_names = []
    for brain_dir in tqdm(all_brains_dir):
        all_modalities, brain_affine, brain_name = read_brain(brain_dir, mode='train', **crop_coordinates)
        brain    = all_modalities[..., :4]
        gt       = all_modalities[..., -1]
        
        # in BraTS 2017, 2018, 2019 there is no '3' label!
        gt[gt==4]  = 3    
        brain_names.append(brain_name)   
        data_storage.append(brain[np.newaxis,...])
        truth_storage.append(gt[np.newaxis,...])
        affine_storage.append(brain_affine[np.newaxis,...])
        
    hdf5_file.create_array(hdf5_file.root, 'brain_names', obj=brain_names)
    hdf5_file.close()
         
    if k_fold:
        validation_split = (1/k_fold) # this equal to 5-fold validation
        all_HGG_names = [i for i in brain_names if 'HGG' in i]
        all_LGG_names = [i for i in brain_names if 'LGG' in i]
              
        np.random.seed(100)
        np.random.shuffle(all_HGG_names)
        np.random.shuffle(all_LGG_names)
              
        HGG_val_size = int(validation_split * len(all_HGG_names))
        LGG_val_size = int(validation_split * len(all_LGG_names))
        
        for fold in range(k_fold):
            chosen_HGG_val = all_HGG_names[fold*HGG_val_size:(fold+1)*HGG_val_size]
            chosen_LGG_val = all_LGG_names[fold*LGG_val_size:(fold+1)*LGG_val_size]
        
            chosen_HGG_train = [i for i in all_HGG_names if i not in chosen_HGG_val]
            chosen_LGG_train = [i for i in all_LGG_names if i not in chosen_LGG_val]
        
            # saving train_idx is enough
            train = chosen_HGG_train + chosen_LGG_train    
            train_idx = [brain_names.index(i) for i in train]    
            train_idx.sort()
        
            np.save(os.path.join(save_dir, 'fold{}_idx.npy'.format(fold)), train_idx)
    
   
    
if __name__ == '__main__':
      
    create_table(cfg['data_dir'], cfg['table_data_shape'], cfg['save_data_dir'], 
                 cfg['crop_coord'], cfg['data_channels'], cfg['k_fold'])