pancreas-seg / Git / [48d89d] /slice.py

Models:
SCallahan/
pancreas-seg
Downloads: 1
[48d89d]: / slice.py
History
Download this file
181 lines (144 with data), 7.1 kB

"""
details
-------
1. Get name list of images and labels (/masks/ground truth)
2. Slice each case according to X/Y/Z dim
3. Save a list of [case number, slice number, slicename, labelname, average pixel value of the slice,
                  total pixel number of ground truth, bounding box (minA, maxA, minB, maxB)]

This code is adapted from https://github.com/198808xc/OrganSegC2F/blob/master/OrganSegC2F/init.py
"""

import numpy as np
import os
import sys
import time
from utils import *


# read input arguments
data_path = sys.argv[1] + "/"
organ_number = int(sys.argv[2])
folds = int(sys.argv[3])
low_range = int(sys.argv[4])
high_range = int(sys.argv[5])

if __name__=="__main__":
    # get image list
    image_list = []
    image_filename = []
    keyword = ''
    for directory, _, file_ in os.walk(image_path):
	for filename in sorted(file_):
	    if keyword in filename:
		image_list.append(os.path.join(directory, filename))
		image_filename.append(os.path.splitext(filename)[0])

    # get label list
    label_list = []
    label_filename = []
    for directory, _, file_ in os.walk(label_path):
	for filename in sorted(file_):
	    if keyword in filename:
		label_list.append(os.path.join(directory, filename))
		label_filename.append(os.path.splitext(filename)[0])

    # check if #image equals #labels
    if len(image_list) != len(label_list):
	exit('Error: the number of labels and the number of images are not equal!')


    total_samples = len(image_list)

    for plane in ['Z']:
    	output = open(list_training[plane], 'w')
    	output.close()

    print 'Initialization starts.'

# iterate through all samples
for i in range(total_samples):
    start_time = time.time()
    print 'Processing ' + str(i + 1) + ' out of ' + str(len(image_list)) + ' files.'

    image = np.load(image_list[i])
    label = np.load(label_list[i])

    # only z for now
    for plane in ['Z']:
        # slice_number is the number of slices of corresponding dimension (X/Y/Z)
        slice_number = label.shape[2]

        image_directory_ = os.path.join(image_path_[plane], image_filename[i])
        if not os.path.exists(image_directory_):
            os.makedirs(image_directory_)

        label_directory_ = os.path.join(label_path_[plane], label_filename[i])
        if not os.path.exists(label_directory_):
            os.makedirs(label_directory_)

        print '    Slicing data: ' + str(time.time() - start_time) + ' second(s) elapsed.'
        # for storing the total number of pixels of ground truth mask
        sum_ = np.zeros((slice_number, organ_number + 1), dtype = np.int)
        # for storing bounding boxes of ground truth masks (A_min, A_max, B_min, B_max)
        minA = np.zeros((slice_number, organ_number + 1), dtype = np.int)
        maxA = np.zeros((slice_number, organ_number + 1), dtype = np.int)
        minB = np.zeros((slice_number, organ_number + 1), dtype = np.int)
        maxB = np.zeros((slice_number, organ_number + 1), dtype = np.int)
        # for storing mean pixel value of each slice
        average = np.zeros((slice_number), dtype = np.float)

        # iterate through all slices of current case i and current plane
        for j in range(0, slice_number):
            # image_filename_ sample dir: image_X /  0001  / 0001.npy
            #                              plane/ case num / slice num
            image_filename_ = os.path.join( \
                image_path_[plane], image_filename[i], '{:0>4}'.format(j) + '.npy')

            label_filename_ = os.path.join( \
                label_path_[plane], label_filename[i], '{:0>4}'.format(j) + '.npy')

            image_ = image[:, :, j]
            label_ = label[:, :, j]

            # threshold image to specified range ([-100, 240] for pancreas)
            image_[image_ < low_range] = low_range
            image_[image_ > high_range] = high_range

            # save sliced image and label
            if not os.path.isfile(image_filename_) or not os.path.isfile(label_filename_):
                np.save(image_filename_, image_)
                np.save(label_filename_, label_)

            # compute the mean value of the slice
            average[j] = float(image_.sum()) / (image_.shape[0] * image_.shape[1])

            for o in range(1, organ_number + 1):
                # this is the sum of pixel numbers of a ground truth mask
                sum_[j, o] = (is_organ(label_, o)).sum()
                # record the coordinates of ground truth mask pixels
                arr = np.nonzero(is_organ(label_, o))

                # save the bounding box of ground truth mask (A_min, A_max, B_min, B_max)
                minA[j, o] = 0 if not len(arr[0]) else min(arr[0])
                maxA[j, o] = 0 if not len(arr[0]) else max(arr[0])
                minB[j, o] = 0 if not len(arr[1]) else min(arr[1])
                maxB[j, o] = 0 if not len(arr[1]) else max(arr[1])

        # iterate each slice of current case i
        for j in range(0, slice_number):
            image_filename_ = os.path.join( \
                image_path_[plane], image_filename[i], '{:0>4}'.format(j) + '.npy')

            label_filename_ = os.path.join( \
                label_path_[plane], label_filename[i], '{:0>4}'.format(j) + '.npy')

            # append the following output to training_X/Y/Z.txt
            output = open(list_training[plane], 'a+')
            # case number, slice number
            output.write(str(i) + ' ' + str(j))
            # image file name, label file name
            output.write(' ' + image_filename_ + ' ' + label_filename_)
            # average pixel value of slice j, case i, and current plane
            output.write(' ' + str(average[j]))
            # sum of ground truth pixels, and bounding box of gt mask (A_min, A_max, B_min, B_max)
            for o in range(1, organ_number + 1):
                output.write(' ' + str(sum_[j, o]) + ' ' + str(minA[j, o]) + \
                    ' ' + str(maxA[j, o]) + ' ' + str(minB[j, o]) + ' ' + str(maxB[j, o]))

            output.write('\n')

        output.close()

        print '  ' + plane + ' plane is done: ' + \
            str(time.time() - start_time) + ' second(s) elapsed.'

    print 'Processed ' + str(i + 1) + ' out of ' + str(len(image_list)) + ' files: ' + \
        str(time.time() - start_time) + ' second(s) elapsed.'


# create the 4 training image lists
print 'Writing training image list.'
for f in range(folds):
    list_training_ = training_set_filename(f)
    output = open(list_training_, 'w')
    for i in range(total_samples):
        if in_training_set(total_samples, i, folds, f):
            output.write(str(i) + ' ' + image_list[i] + ' ' + label_list[i] + '\n')
    output.close()

# create the 4 test image lists
print 'Writing testing image list.'
for f in range(folds):
    list_testing_ = testing_set_filename(f)
    output = open(list_testing_, 'w')
    for i in range(total_samples):
        if not in_training_set(total_samples, i, folds, f):
            output.write(str(i) + ' ' + image_list[i] + ' ' + label_list[i] + '\n')
    output.close()

print 'Initialization is done.'