3D_DenseSeg / Git / [168bda] /prepare_hdf5

Models:
SCallahan/
3D_DenseSeg
Downloads: 1
[168bda]: / prepare_hdf5_cutedge.py
History
Download this file
125 lines (108 with data), 4.3 kB

from medpy.io import load
import numpy as np
import os
import h5py

#Path to your dataset (img, hdr files)
data_path = '/media/toanhoi/Study/databaseSeg/ISeg/iSeg-2017-Training'
#Saved path
target_path = '/media/toanhoi/Study/databaseSeg/ISeg/hdf5_iseg_data'
#Reference https://github.com/zhengyang-wang/Unet_3D/tree/master/preprocessing
def cut_edge(data, keep_margin):
    '''
    function that cuts zero edge
    '''
    D, H, W = data.shape
    D_s, D_e = 0, D - 1
    H_s, H_e = 0, H - 1
    W_s, W_e = 0, W - 1

    while D_s < D:
        if data[D_s].sum() != 0:
            break
        D_s += 1
    while D_e > D_s:
        if data[D_e].sum() != 0:
            break
        D_e -= 1
    while H_s < H:
        if data[:, H_s].sum() != 0:
            break
        H_s += 1
    while H_e > H_s:
        if data[:, H_e].sum() != 0:
            break
        H_e -= 1
    while W_s < W:
        if data[:, :, W_s].sum() != 0:
            break
        W_s += 1
    while W_e > W_s:
        if data[:, :, W_e].sum() != 0:
            break
        W_e -= 1

    if keep_margin != 0:
        D_s = max(0, D_s - keep_margin)
        D_e = min(D - 1, D_e + keep_margin)
        H_s = max(0, H_s - keep_margin)
        H_e = min(H - 1, H_e + keep_margin)
        W_s = max(0, W_s - keep_margin)
        W_e = min(W - 1, W_e + keep_margin)

    return int(D_s), int(D_e), int(H_s), int(H_e), int(W_s), int(W_e)

def convert_label(label_img):
    label_processed=np.zeros(label_img.shape[0:]).astype(np.uint8)
    for i in range(label_img.shape[2]):
        label_slice=label_img[:, :, i]
        label_slice[label_slice == 10] = 1
        label_slice[label_slice == 150] = 2
        label_slice[label_slice == 250] = 3
        label_processed[:, :, i]=label_slice
    return label_processed

def build_h5_dataset(data_path, target_path):
    '''
    Build HDF5 Image Dataset.
    '''
    for i in range(10):
        #Skip subject 9 for validation
        if (i==8):
            continue
        subject_name = 'subject-%d-' % (i + 1)
        f_T1 = os.path.join(data_path, subject_name + 'T1.hdr')
        img_T1, header_T1 = load(f_T1)
        f_T2 = os.path.join(data_path, subject_name + 'T2.hdr')
        img_T2, header_T2 = load(f_T2)
        f_l = os.path.join(data_path, subject_name + 'label.hdr')
        labels, header_label = load(f_l)

        inputs_T1 = img_T1.astype(np.float32)
        inputs_T2 = img_T2.astype(np.float32)
        labels = labels.astype(np.uint8)
        labels=convert_label(labels)
        mask=labels>0
        # Normalization
        inputs_T1_norm = (inputs_T1 - inputs_T1[mask].mean()) / inputs_T1[mask].std()
        inputs_T2_norm = (inputs_T2 - inputs_T2[mask].mean()) / inputs_T2[mask].std()

        # Cut edge
        margin = 64/2   # training_patch_size / 2
        mask = mask.astype(np.uint8)
        min_D_s, max_D_e, min_H_s, max_H_e, min_W_s, max_W_e = cut_edge(mask, margin)
        inputs_tmp_T1 = inputs_T1_norm[min_D_s:max_D_e + 1, min_H_s: max_H_e + 1, min_W_s:max_W_e + 1]
        inputs_tmp_T2 = inputs_T2_norm[min_D_s:max_D_e + 1, min_H_s: max_H_e + 1, min_W_s:max_W_e + 1]

        labels_tmp = labels[min_D_s:max_D_e + 1, min_H_s: max_H_e + 1, min_W_s:max_W_e + 1]

        inputs_tmp_T1 = inputs_tmp_T1[:, :, :, None]
        inputs_tmp_T2 = inputs_tmp_T2[:, :, :, None]
        labels_tmp = labels_tmp[:, :, :, None]

        inputs = np.concatenate((inputs_tmp_T1, inputs_tmp_T2), axis=3)

        print (inputs.shape, labels_tmp.shape)

        inputs_caffe = inputs[None, :, :, :, :]
        labels_caffe = labels_tmp[None, :, :, :, :]
        inputs_caffe = inputs_caffe.transpose(0, 4, 3, 1, 2)
        labels_caffe = labels_caffe.transpose(0, 4, 3, 1, 2)

        with h5py.File(os.path.join(target_path, 'train_iseg_norm_cutedge_weight_%s.h5' % (i+1)), 'w') as f:
            f['data'] = inputs_caffe  # for caffe num channel x d x h x w
            f['label'] = labels_caffe

        with open('./train_list.txt', 'a') as f:
            f.write(os.path.join(target_path, 'train_iseg_norm_cutedge_weight_%s.h5\n' % (i+1)))

if __name__ == '__main__':
    if not os.path.exists(target_path):
        os.makedirs(target_path)
    if os.path.exists("./train_list.txt"):
        os.remove("./train_list.txt")
    build_h5_dataset(data_path, target_path)