--- a +++ b/prepare_hdf5_cutedge.py @@ -0,0 +1,124 @@ +from medpy.io import load +import numpy as np +import os +import h5py + +#Path to your dataset (img, hdr files) +data_path = '/media/toanhoi/Study/databaseSeg/ISeg/iSeg-2017-Training' +#Saved path +target_path = '/media/toanhoi/Study/databaseSeg/ISeg/hdf5_iseg_data' +#Reference https://github.com/zhengyang-wang/Unet_3D/tree/master/preprocessing +def cut_edge(data, keep_margin): + ''' + function that cuts zero edge + ''' + D, H, W = data.shape + D_s, D_e = 0, D - 1 + H_s, H_e = 0, H - 1 + W_s, W_e = 0, W - 1 + + while D_s < D: + if data[D_s].sum() != 0: + break + D_s += 1 + while D_e > D_s: + if data[D_e].sum() != 0: + break + D_e -= 1 + while H_s < H: + if data[:, H_s].sum() != 0: + break + H_s += 1 + while H_e > H_s: + if data[:, H_e].sum() != 0: + break + H_e -= 1 + while W_s < W: + if data[:, :, W_s].sum() != 0: + break + W_s += 1 + while W_e > W_s: + if data[:, :, W_e].sum() != 0: + break + W_e -= 1 + + if keep_margin != 0: + D_s = max(0, D_s - keep_margin) + D_e = min(D - 1, D_e + keep_margin) + H_s = max(0, H_s - keep_margin) + H_e = min(H - 1, H_e + keep_margin) + W_s = max(0, W_s - keep_margin) + W_e = min(W - 1, W_e + keep_margin) + + return int(D_s), int(D_e), int(H_s), int(H_e), int(W_s), int(W_e) + +def convert_label(label_img): + label_processed=np.zeros(label_img.shape[0:]).astype(np.uint8) + for i in range(label_img.shape[2]): + label_slice=label_img[:, :, i] + label_slice[label_slice == 10] = 1 + label_slice[label_slice == 150] = 2 + label_slice[label_slice == 250] = 3 + label_processed[:, :, i]=label_slice + return label_processed + +def build_h5_dataset(data_path, target_path): + ''' + Build HDF5 Image Dataset. + ''' + for i in range(10): + #Skip subject 9 for validation + if (i==8): + continue + subject_name = 'subject-%d-' % (i + 1) + f_T1 = os.path.join(data_path, subject_name + 'T1.hdr') + img_T1, header_T1 = load(f_T1) + f_T2 = os.path.join(data_path, subject_name + 'T2.hdr') + img_T2, header_T2 = load(f_T2) + f_l = os.path.join(data_path, subject_name + 'label.hdr') + labels, header_label = load(f_l) + + inputs_T1 = img_T1.astype(np.float32) + inputs_T2 = img_T2.astype(np.float32) + labels = labels.astype(np.uint8) + labels=convert_label(labels) + mask=labels>0 + # Normalization + inputs_T1_norm = (inputs_T1 - inputs_T1[mask].mean()) / inputs_T1[mask].std() + inputs_T2_norm = (inputs_T2 - inputs_T2[mask].mean()) / inputs_T2[mask].std() + + # Cut edge + margin = 64/2 # training_patch_size / 2 + mask = mask.astype(np.uint8) + min_D_s, max_D_e, min_H_s, max_H_e, min_W_s, max_W_e = cut_edge(mask, margin) + inputs_tmp_T1 = inputs_T1_norm[min_D_s:max_D_e + 1, min_H_s: max_H_e + 1, min_W_s:max_W_e + 1] + inputs_tmp_T2 = inputs_T2_norm[min_D_s:max_D_e + 1, min_H_s: max_H_e + 1, min_W_s:max_W_e + 1] + + labels_tmp = labels[min_D_s:max_D_e + 1, min_H_s: max_H_e + 1, min_W_s:max_W_e + 1] + + inputs_tmp_T1 = inputs_tmp_T1[:, :, :, None] + inputs_tmp_T2 = inputs_tmp_T2[:, :, :, None] + labels_tmp = labels_tmp[:, :, :, None] + + inputs = np.concatenate((inputs_tmp_T1, inputs_tmp_T2), axis=3) + + print (inputs.shape, labels_tmp.shape) + + inputs_caffe = inputs[None, :, :, :, :] + labels_caffe = labels_tmp[None, :, :, :, :] + inputs_caffe = inputs_caffe.transpose(0, 4, 3, 1, 2) + labels_caffe = labels_caffe.transpose(0, 4, 3, 1, 2) + + with h5py.File(os.path.join(target_path, 'train_iseg_norm_cutedge_weight_%s.h5' % (i+1)), 'w') as f: + f['data'] = inputs_caffe # for caffe num channel x d x h x w + f['label'] = labels_caffe + + with open('./train_list.txt', 'a') as f: + f.write(os.path.join(target_path, 'train_iseg_norm_cutedge_weight_%s.h5\n' % (i+1))) + +if __name__ == '__main__': + if not os.path.exists(target_path): + os.makedirs(target_path) + if os.path.exists("./train_list.txt"): + os.remove("./train_list.txt") + build_h5_dataset(data_path, target_path)