pancreas-seg / Git / Diff of /data.py

Models:

SCallahan/

pancreas-seg

Downloads: 1

Diff of /data.py [000000] .. [48d89d]

Switch to unified view

 b/data.py
+"""
+This code is to
+. Create train & test input to Network as numpy arrays
+. Load the train & test numpy arrays
+"""
+import numpy as np
+from utils import *
+# data type to save as np array
+npdtype = np.float32
+def create_train_data(current_fold, plane):
+    """
+    Crop each slice by its ground truth bounding box,
+    then pad zeros to form uniform dimension,
+    rescale pixel intensities to [0,1]
+    """
+    # get the list of image and label number of current_fold
+    imlb_list = open(training_set_filename(current_fold), 'r').read().splitlines()
+    current_fold = current_fold
+    training_image_set = np.zeros((len(imlb_list)), dtype = np.int)
+    for i in range(len(imlb_list)):
+        s = imlb_list[i].split(' ')
+        training_image_set[i] = int(s[0])
+    slice_list = open(list_training[plane], 'r').read().splitlines()
+    slices = len( slice_list)
+    image_ID = np.zeros(( slices), dtype = np.int)
+    slice_ID = np.zeros(( slices), dtype = np.int)
+    image_filename = ['' for l in range( slices)]
+    label_filename = ['' for l in range( slices)]
+    pixels = np.zeros((slices), dtype = np.int)
+    for l in range(slices):
+        s =  slice_list[l].split(' ')
+        image_ID[l] = s[0]
+        slice_ID[l] = s[1]
+        image_filename[l] = s[2]
+        label_filename[l] = s[3]
+        pixels[l] = int(s[organ_ID * 5])
+    create_slice_list = []
+    create_label_list = []
+    for l in range(slices):
+        if image_ID[l] in training_image_set and pixels[l] >= 100:
+            create_slice_list.append(image_filename[l])
+            create_label_list.append(label_filename[l])
+    if len(create_slice_list)!= len(create_label_list):
+        raise ValueError('slice number does not equal label number!')
+    total = len(create_slice_list)
+    img_rows = XMAX
+    img_cols = YMAX
+    imgs = np.ndarray((total, img_rows, img_cols), dtype = npdtype)
+    imgs_mask = np.ndarray((total, img_rows, img_cols), dtype = npdtype)
+    print('-'*30)
+    print('  Creating training data...')
+    print('-'*30)
+    for i in range(len(create_slice_list)):
+        cur_im = np.load(create_slice_list[i])
+        cur_mask = np.load(create_label_list[i])
+        cur_im = (cur_im - low_range) / float(high_range - low_range)
+        arr = np.nonzero(cur_mask)
+        width = cur_mask.shape[0]
+        height = cur_mask.shape[1]
+        minA = min(arr[0])
+        maxA = max(arr[0])
+        minB = min(arr[1])
+        maxB = max(arr[1])
+        # with margin
+        cropped_im = cur_im[max(minA - margin, 0): min(maxA + margin + 1, width), \
+                                    max(minB - margin, 0): min(maxB + margin + 1, height)]
+        cropped_mask = cur_mask[max(minA - margin, 0): min(maxA + margin + 1, width), \
+                                    max(minB - margin, 0): min(maxB + margin + 1, height)]
+        imgs[i] = pad_2d(cropped_im, plane, 0, XMAX, YMAX, ZMAX)
+        imgs_mask[i] = pad_2d(cropped_mask, plane, 0, XMAX, YMAX, ZMAX)
+        if i % 100 == 0:
+            print('Done: {0}/{1} slices'.format(i, total))
+    np.save('imgs_train_%s_%s.npy'%(current_fold, plane), imgs)
+    np.save('masks_train_%s_%s.npy'%(current_fold, plane), imgs_mask)
+    print('Training data created for fold %s, plane %s'%(current_fold, plane))
+def load_train_data(current_fold, plane):
+    imgs_train = np.load('imgs_train_%s_%s.npy'%(current_fold, plane))
+    mask_train = np.load('masks_train_%s_%s.npy'%(current_fold, plane))
+    return imgs_train, mask_train
+def load_test_data(current_fold, plane):
+    imgs_test = np.load('imgs_test_%s_%s.npy'%(current_fold, plane))
+    mask_test = np.load('masks_test_$s_%s.npy'%(current_fold, plane))
+    return imgs_test, mask_test
+if __name__ == '__main__':
+    data_path = sys.argv[1]
+    current_fold = int(sys.argv[2])
+    plane = sys.argv[3]
+    # dim of each case (after padding zeors to max gt bounding box)
+    ZMAX = int(sys.argv[4])
+    YMAX = int(sys.argv[5])
+    XMAX = int(sys.argv[6])
+    margin = int(sys.argv[7])
+    organ_ID = int(sys.argv[8])
+    low_range = int(sys.argv[9])
+    high_range = int(sys.argv[10])
+    create_train_data(current_fold, plane)