import numpy as np
import utils_lung
import pathfinder
import utils
# 6% to 28% for nodules 5 to 10 mm,
prob5 = (0.01+0.06)/2.
slope10 = (0.28-prob5) / (10.-5.)
offset10 = prob5 - slope10*5.
slope20 = (0.64-0.28) / (20.-10.)
offset20 = 0.28 - slope20*10.
# and 64% to 82% for nodules >20 mm in diameter
slope25 = (0.82-0.64) / (25.-20.)
offset25 = 0.64 - slope25*20.
slope30 = (0.93-0.82) / (30.-25.)
offset30 = 0.82 - slope30*25.
# For nodules more than 3 cm in diameter, 93% to 97% are malignant
slope40 = (0.97-0.93) / (40.-30.)
offset40 = 0.93 - slope40*30.
def diameter_to_prob(diam):
# The prevalence of malignancy is 0% to 1% for nodules <5 mm,
if diam < 5:
p = prob5*diam/5.
elif diam < 10:
p = slope10*diam+offset10
elif diam < 20:
p = slope20*diam+offset20
elif diam < 25:
p = slope25*diam+offset25
elif diam < 30:
p = slope30*diam+offset30
else:
p = slope40 * diam + offset40
return np.clip(p ,0.,1.)
class LunaDataGenerator(object):
def __init__(self, data_path, transform_params, data_prep_fun, rng,
random, infinite, patient_ids=None, **kwargs):
self.patient_ids = patient_ids
if patient_ids:
self.patient_paths = [data_path + '/' + p + '.mhd' for p in patient_ids]
else:
patient_paths = utils_lung.get_patient_data_paths(data_path)
self.patient_paths = [p for p in patient_paths if '.mhd' in p]
self.id2annotations = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)
self.nsamples = len(self.patient_paths)
self.data_path = data_path
self.rng = rng
self.random = random
self.infinite = infinite
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
def generate(self):
while True:
rand_idxs = np.arange(self.nsamples)
if self.random:
self.rng.shuffle(rand_idxs)
for pos in xrange(0, len(rand_idxs)):
idx = rand_idxs[pos]
patient_path = self.patient_paths[idx]
pid = utils_lung.extract_pid_filename(patient_path)
img, origin, pixel_spacing = utils_lung.read_mhd(patient_path)
x, y, annotations, tf_matrix = self.data_prep_fun(data=img,
pixel_spacing=pixel_spacing,
luna_annotations=
self.id2annotations[pid],
luna_origin=origin)
x = np.float32(x)[None, None, :, :, :]
y = np.float32(y)[None, None, :, :, :]
yield x, y, None, annotations, tf_matrix, pid
if not self.infinite:
break
class LunaSimpleDataGenerator(object):
def __init__(self, data_path, patient_ids=None, **kwargs):
self.patient_ids = patient_ids
self.data_path = data_path
self.file_extension = '.pkl' if 'pkl' in data_path else '.mhd'
if patient_ids:
self.patient_paths = [data_path + '/' + p + self.file_extension for p in patient_ids]
else:
patient_paths = utils_lung.get_patient_data_paths(data_path)
self.patient_paths = [p for p in patient_paths if self.file_extension in p]
self.nsamples = len(self.patient_paths)
print self.data_path
def generate(self):
for patient_path in self.patient_paths:
pid = utils_lung.extract_pid_filename(patient_path)
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
x = np.float32(img)
yield x, pid
class LunaScanPositiveDataGenerator(LunaDataGenerator):
def __init__(self, data_path, transform_params, data_prep_fun, rng,
random, infinite, patient_ids=None, **kwargs):
super(LunaScanPositiveDataGenerator, self).__init__(data_path, transform_params, data_prep_fun, rng,
random, infinite, patient_ids, **kwargs)
patient_ids_all = [utils_lung.extract_pid_filename(p) for p in self.patient_paths]
patient_ids_pos = [pid for pid in patient_ids_all if pid in self.id2annotations.keys()]
self.patient_paths = [data_path + '/' + p + '.mhd' for p in patient_ids_pos]
self.nsamples = len(self.patient_paths)
class LunaScanPositiveLungMaskDataGenerator(LunaDataGenerator):
def __init__(self, data_path, batch_size, transform_params, data_prep_fun, rng,
full_batch, random, infinite, patient_ids=None, **kwargs):
super(LunaScanPositiveLungMaskDataGenerator, self).__init__(data_path, transform_params,
data_prep_fun, rng,
random, infinite, patient_ids, **kwargs)
def generate(self):
while True:
rand_idxs = np.arange(self.nsamples)
if self.random:
self.rng.shuffle(rand_idxs)
for pos in xrange(0, len(rand_idxs)):
idx = rand_idxs[pos]
patient_path = self.patient_paths[idx]
pid = utils_lung.extract_pid_filename(patient_path)
img, origin, pixel_spacing = utils_lung.read_mhd(patient_path)
x, y, lung_mask, annotations, tf_matrix = self.data_prep_fun(data=img,
pixel_spacing=pixel_spacing,
luna_annotations=
self.id2annotations[pid],
luna_origin=origin)
x = np.float32(x)[None, None, :, :, :]
y = np.float32(y)[None, None, :, :, :]
lung_mask = np.float32(lung_mask)[None, None, :, :, :]
yield x, y, lung_mask, annotations, tf_matrix, pid
if not self.infinite:
break
class LunaScanMaskPositiveDataGenerator(LunaDataGenerator):
def __init__(self, data_path, seg_data_path, batch_size, transform_params, data_prep_fun, rng,
full_batch, random, infinite, patient_ids=None, **kwargs):
super(LunaScanMaskPositiveDataGenerator, self).__init__(data_path, transform_params,
data_prep_fun, rng,
random, infinite, patient_ids, **kwargs)
self.seg_data_path = seg_data_path
self.mask_paths = [seg_data_path + '/' + p + '.mhd' for p in self.patient_ids]
def generate(self):
while True:
rand_idxs = np.arange(self.nsamples)
if self.random:
self.rng.shuffle(rand_idxs)
for pos in xrange(0, len(rand_idxs)):
idx = rand_idxs[pos]
ct_scan_path = self.patient_paths[idx]
mask_path = self.mask_paths[idx]
pid = utils_lung.extract_pid_filename(ct_scan_path)
ct_scan, ct_origin, ct_pixel_spacing = utils_lung.read_mhd(ct_scan_path)
mask, mask_origin, mask_pixel_spacing = utils_lung.read_mhd(mask_path)
assert(sum(abs(ct_origin-mask_origin)) < 1e-9)
assert(sum(abs(ct_pixel_spacing-mask_pixel_spacing)) < 1e-9)
ct, lung_mask, annotations, tf_matrix = self.data_prep_fun(ct_scan=ct_scan, mask=mask,
pixel_spacing=ct_pixel_spacing,
luna_annotations=
self.id2annotations[pid],
luna_origin=ct_origin)
ct = np.float32(ct)[None, None, :, :, :]
lung_mask = np.float32(lung_mask)[None, None, :, :, :]
yield ct, lung_mask, annotations, tf_matrix, pid
if not self.infinite:
break
#for lung segmentation, does not work yet
class PatchLunaDataGenerator(object):
def __init__(self, ct_data_path, seg_data_path, batch_size, transform_params, data_prep_fun, rng,
full_batch, random, infinite, patient_ids=None, **kwargs):
if patient_ids:
self.patient_ids = patient_ids
#self.patient_paths = [data_path + '/' + p + '.mhd' for p in patient_ids]
else:
patient_paths = utils_lung.get_patient_data_paths(data_path)
#self.patient_paths = [p for p in patient_paths if '.mhd' in p]
self.patient_ids = [utils_lung.extract_pid_filename(p) for p in self.patient_paths]\
self.nsamples = len(self.patient_ids)
self.ct_data_path = ct_data_path
self.seg_data_path = seg_data_path
self.rng = rng
self.random = random
self.infinite = infinite
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.batch_size = batch_size
self.full_batch = full_batch
def generate(self):
while True:
rand_idxs = np.arange(self.nsamples)
if self.random:
self.rng.shuffle(rand_idxs)
for pos in xrange(0, len(rand_idxs), self.batch_size):
idxs_batch = rand_idxs[pos:pos + self.batch_size]
nb = len(idxs_batch)
# allocate batches
x_batch = np.zeros((nb, 1) + self.transform_params['patch_size'], dtype='float32')
y_batch = np.zeros((nb, 1) + self.transform_params['patch_size'], dtype='float32')
patients_ids = []
for i, idx in enumerate(idxs_batch):
pid = self.patient_ids[idx]
ct_path = self.ct_data_path + pid + '.mhd'
seg_path = self.seg_data_path + pid + '.mhd'
patients_ids.append(pid)
ct_img, ct_origin, ct_pixel_spacing = utils_lung.read_mhd(ct_path)
seg_img, seg_origin, seg_pixel_spacing = utils_lung.read_mhd(seg_path)
assert(np.sum(ct_origin-seg_origin) < 1e-9)
assert(np.sum(ct_pixel_spacing-seg_pixel_spacing) < 1e-9)
print 'ct_img.shape', ct_img.shape
print 'seg_img.shape', seg_img.shape
w,h,d = self.transform_params['patch_size']
patch_center = [self.rng.randint(w/2, ct_img.shape[0]-w/2),
self.rng.randint(h/2, ct_img.shape[1]-h/2),
self.rng.randint(d/2, ct_img.shape[1]-d/2)]
print patch_center
x_batch[i, 0, :, :, :], y_batch[i, 0, :, :, :] = self.data_prep_fun(ct_img=ct_img, seg_img=seg_img,
patch_center=patch_center,
pixel_spacing=ct_pixel_spacing,
luna_origin=ct_origin)
# y_batch[i, 0, :, :, :], = self.data_prep_fun(data=seg_img,
# patch_center=patch_center,
# pixel_spacing=seg_pixel_spacing,
# luna_origin=seg_origin)
if self.full_batch:
if nb == self.batch_size:
yield x_batch, y_batch, patients_ids
else:
yield x_batch, y_batch, patients_ids
if not self.infinite:
break
#works, tested
class LunaScanDataGenerator(object):
def __init__(self, ct_data_path, seg_data_path, patient_ids=None, **kwargs):
if patient_ids:
self.patient_ids = patient_ids
#self.patient_paths = [data_path + '/' + p + '.mhd' for p in patient_ids]
else:
patient_paths = utils_lung.get_patient_data_paths(ct_data_path)
#self.patient_paths = [p for p in patient_paths if '.mhd' in p]
self.patient_ids = [utils_lung.extract_pid_filename(p) for p in self.patient_paths]\
self.nsamples = len(self.patient_ids)
self.ct_data_path = ct_data_path
self.seg_data_path = seg_data_path
def generate(self):
for pid in self.patient_ids:
ct_path = self.ct_data_path + pid + '.mhd'
seg_path = self.seg_data_path + pid + '.mhd'
ct_img, ct_origin, ct_pixel_spacing = utils_lung.read_mhd(ct_path)
seg_img, seg_origin, seg_pixel_spacing = utils_lung.read_mhd(seg_path)
assert(np.sum(ct_origin-seg_origin) < 1e-9)
assert(np.sum(ct_pixel_spacing-seg_pixel_spacing) < 1e-9)
print 'ct_img.shape', ct_img.shape
print 'seg_img.shape', seg_img.shape
yield ct_img, seg_img, pid
class PatchPositiveLunaDataGenerator(object):
def __init__(self, data_path, batch_size, transform_params, data_prep_fun, rng,
full_batch, random, infinite, patient_ids=None, **kwargs):
self.id2annotations = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)
if patient_ids:
self.patient_paths = [data_path + '/' + p + '.mhd' for p in patient_ids]
else:
patient_paths = utils_lung.get_patient_data_paths(data_path)
self.patient_paths = [p for p in patient_paths if '.mhd' in p]
patient_ids_all = [utils_lung.extract_pid_filename(p) for p in self.patient_paths]
patient_ids_pos = [pid for pid in patient_ids_all if pid in self.id2annotations.keys()]
self.patient_paths = [data_path + '/' + p + '.mhd' for p in patient_ids_pos]
self.nsamples = len(self.patient_paths)
self.data_path = data_path
self.rng = rng
self.random = random
self.infinite = infinite
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.batch_size = batch_size
self.full_batch = full_batch
def generate(self):
while True:
rand_idxs = np.arange(self.nsamples)
if self.random:
self.rng.shuffle(rand_idxs)
for pos in xrange(0, len(rand_idxs), self.batch_size):
idxs_batch = rand_idxs[pos:pos + self.batch_size]
nb = len(idxs_batch)
# allocate batches
x_batch = np.zeros((nb, 1) + self.transform_params['patch_size'], dtype='float32')
y_batch = np.zeros((nb, 1) + self.transform_params['patch_size'], dtype='float32')
patients_ids = []
for i, idx in enumerate(idxs_batch):
patient_path = self.patient_paths[idx]
id = utils_lung.extract_pid_filename(patient_path)
patients_ids.append(id)
img, origin, pixel_spacing = utils_lung.read_mhd(patient_path)
patient_annotations = self.id2annotations[id]
patch_center = patient_annotations[self.rng.randint(len(patient_annotations))]
x_batch[i, 0, :, :, :], y_batch[i, 0, :, :, :] = self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_annotations=patient_annotations,
luna_origin=origin)
if self.full_batch:
if nb == self.batch_size:
yield x_batch, y_batch, patients_ids
else:
yield x_batch, y_batch, patients_ids
if not self.infinite:
break
class ValidPatchPositiveLunaDataGenerator(object):
def __init__(self, data_path, transform_params, patient_ids, data_prep_fun, **kwargs):
id2positive_annotations = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)
self.id2positive_annotations = {}
self.id2patient_path = {}
n_positive = 0
for pid in patient_ids:
if pid in id2positive_annotations:
self.id2positive_annotations[pid] = id2positive_annotations[pid]
n_pos = len(id2positive_annotations[pid])
self.id2patient_path[pid] = data_path + '/' + pid + '.mhd'
n_positive += n_pos
self.nsamples = n_positive
self.data_path = data_path
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
def generate(self):
for pid in self.id2positive_annotations.iterkeys():
for patch_center in self.id2positive_annotations[pid]:
patient_path = self.id2patient_path[pid]
img, origin, pixel_spacing = utils_lung.read_mhd(patient_path)
patient_annotations = self.id2positive_annotations[pid]
x_batch, y_batch = self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_annotations=patient_annotations,
luna_origin=origin)
x_batch = np.float32(x_batch)[None, None, :, :, :]
y_batch = np.float32(y_batch)[None, None, :, :, :]
yield x_batch, y_batch, [pid]
class CandidatesLunaDataGenerator(object):
def __init__(self, data_path, batch_size, transform_params, patient_ids, data_prep_fun, rng,
full_batch, random, infinite, positive_proportion, **kwargs):
id2positive_annotations = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)
id2negative_annotations = utils_lung.read_luna_negative_candidates(pathfinder.LUNA_CANDIDATES_PATH)
self.file_extension = '.pkl' if 'pkl' in data_path else '.mhd'
self.id2positive_annotations = {}
self.id2negative_annotations = {}
self.patient_paths = []
n_positive, n_negative = 0, 0
for pid in patient_ids:
if pid in id2positive_annotations:
self.id2positive_annotations[pid] = id2positive_annotations[pid]
self.id2negative_annotations[pid] = id2negative_annotations[pid]
self.patient_paths.append(data_path + '/' + pid + self.file_extension)
n_positive += len(id2positive_annotations[pid])
n_negative += len(id2negative_annotations[pid])
print 'n positive', n_positive
print 'n negative', n_negative
self.nsamples = len(self.patient_paths)
print 'n patients', self.nsamples
self.data_path = data_path
self.batch_size = batch_size
self.rng = rng
self.full_batch = full_batch
self.random = random
self.infinite = infinite
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.positive_proportion = positive_proportion
def generate(self):
while True:
rand_idxs = np.arange(self.nsamples)
if self.random:
self.rng.shuffle(rand_idxs)
for pos in xrange(0, len(rand_idxs), self.batch_size):
idxs_batch = rand_idxs[pos:pos + self.batch_size]
nb = len(idxs_batch)
# allocate batches
x_batch = np.zeros((nb, 1) + self.transform_params['patch_size'], dtype='float32')
y_batch = np.zeros((nb, 1), dtype='float32')
patients_ids = []
for i, idx in enumerate(idxs_batch):
patient_path = self.patient_paths[idx]
id = utils_lung.extract_pid_filename(patient_path, self.file_extension)
patients_ids.append(id)
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
if i < np.rint(self.batch_size * self.positive_proportion):
patient_annotations = self.id2positive_annotations[id]
else:
patient_annotations = self.id2negative_annotations[id]
patch_center = patient_annotations[self.rng.randint(len(patient_annotations))]
y_batch[i] = float(patch_center[-1] > 0)
x_batch[i, 0, :, :, :] = self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin)
if self.full_batch:
if nb == self.batch_size:
yield x_batch, y_batch, patients_ids
else:
yield x_batch, y_batch, patients_ids
if not self.infinite:
break
class CandidatesLunaDataGenerator(object):
def __init__(self, data_path, batch_size, transform_params, patient_ids, data_prep_fun, rng,
full_batch, random, infinite, positive_proportion, return_malignancy=False, **kwargs):
id2positive_annotations = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)
id2negative_annotations = utils_lung.read_luna_negative_candidates(pathfinder.LUNA_CANDIDATES_PATH)
self.file_extension = '.pkl' if 'pkl' in data_path else '.mhd'
self.id2positive_annotations = {}
self.id2negative_annotations = {}
self.patient_paths = []
n_positive, n_negative = 0, 0
for pid in patient_ids:
if pid in id2positive_annotations:
self.id2positive_annotations[pid] = id2positive_annotations[pid]
self.id2negative_annotations[pid] = id2negative_annotations[pid]
self.patient_paths.append(data_path + '/' + pid + self.file_extension)
n_positive += len(id2positive_annotations[pid])
n_negative += len(id2negative_annotations[pid])
print 'n positive', n_positive
print 'n negative', n_negative
self.nsamples = len(self.patient_paths)
print 'n patients', self.nsamples
self.data_path = data_path
self.batch_size = batch_size
self.rng = rng
self.full_batch = full_batch
self.random = random
self.infinite = infinite
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.positive_proportion = positive_proportion
self.return_malignancy = return_malignancy
def generate(self):
while True:
rand_idxs = np.arange(self.nsamples)
if self.random:
self.rng.shuffle(rand_idxs)
for pos in xrange(0, len(rand_idxs), self.batch_size):
idxs_batch = rand_idxs[pos:pos + self.batch_size]
nb = len(idxs_batch)
# allocate batches
x_batch = np.zeros((nb,) + self.transform_params['patch_size'], dtype='float32')
y_batch = np.zeros((nb,), dtype='float32')
patients_ids = []
for i, idx in enumerate(idxs_batch):
patient_path = self.patient_paths[idx]
id = utils_lung.extract_pid_filename(patient_path, self.file_extension)
patients_ids.append(id)
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
if i < np.rint(self.batch_size * self.positive_proportion):
patient_annotations = self.id2positive_annotations[id]
else:
patient_annotations = self.id2negative_annotations[id]
patch_center = patient_annotations[self.rng.randint(len(patient_annotations))]
if self.return_malignancy:
y_batch[i] = np.float32(diameter_to_prob(patch_center[-1]))
else:
y_batch[i] = float(patch_center[-1] > 0)
x_batch[i, :, :, :] = self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin)
if self.full_batch:
if nb == self.batch_size:
yield x_batch, y_batch, patients_ids
else:
yield x_batch, y_batch, patients_ids
if not self.infinite:
break
class CandidatesLunaValidDataGenerator(object):
def __init__(self, data_path, transform_params, patient_ids, data_prep_fun, return_malignancy=False, **kwargs):
rng = np.random.RandomState(42) # do not change this!!!
id2positive_annotations = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)
id2negative_annotations = utils_lung.read_luna_negative_candidates(pathfinder.LUNA_CANDIDATES_PATH)
self.file_extension = '.pkl' if 'pkl' in data_path else '.mhd'
self.id2positive_annotations = {}
self.id2negative_annotations = {}
self.id2patient_path = {}
n_positive, n_negative = 0, 0
for pid in patient_ids:
if pid in id2positive_annotations:
self.id2positive_annotations[pid] = id2positive_annotations[pid]
negative_annotations = id2negative_annotations[pid]
n_pos = len(id2positive_annotations[pid])
n_neg = len(id2negative_annotations[pid])
neg_idxs = rng.choice(n_neg, size=n_pos, replace=False)
negative_annotations_selected = []
for i in neg_idxs:
negative_annotations_selected.append(negative_annotations[i])
self.id2negative_annotations[pid] = negative_annotations_selected
self.id2patient_path[pid] = data_path + '/' + pid + self.file_extension
n_positive += n_pos
n_negative += n_pos
print 'n positive', n_positive
print 'n negative', n_negative
self.nsamples = len(self.id2patient_path)
self.data_path = data_path
self.rng = rng
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.return_malignancy = return_malignancy
def generate(self):
for pid in self.id2positive_annotations.iterkeys():
for patch_center in self.id2positive_annotations[pid]:
patient_path = self.id2patient_path[pid]
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
if self.return_malignancy:
y_batch = np.array([diameter_to_prob(patch_center[-1])], dtype='float32')
else:
y_batch = np.array([1.], dtype='float32')
x_batch = np.float32(self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin))[None, :, :, :]
yield x_batch, y_batch, [pid]
for patch_center in self.id2negative_annotations[pid]:
patient_path = self.id2patient_path[pid]
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
y_batch = np.array([0.], dtype='float32')
x_batch = np.float32(self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin))[None, :, :, :]
yield x_batch, y_batch, [pid]
class FixedCandidatesLunaDataGenerator(object):
def __init__(self, data_path, transform_params, id2candidates_path, data_prep_fun, top_n=None):
self.file_extension = '.pkl' if 'pkl' in data_path else '.mhd'
self.id2candidates_path = id2candidates_path
self.id2patient_path = {}
for pid in id2candidates_path.keys():
self.id2patient_path[pid] = data_path + '/' + pid + self.file_extension
self.nsamples = len(self.id2patient_path)
self.data_path = data_path
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.top_n = top_n
def generate(self):
for pid in self.id2candidates_path.iterkeys():
patient_path = self.id2patient_path[pid]
print 'PATIENT', pid
candidates = utils.load_pkl(self.id2candidates_path[pid])
if self.top_n is not None:
candidates = candidates[:self.top_n]
print candidates
print 'n blobs', len(candidates)
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
for candidate in candidates:
y_batch = np.array(candidate, dtype='float32')
patch_center = candidate[:3]
x_batch = np.float32(self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin))[None, None, :, :, :]
yield x_batch, y_batch, [pid]
class CandidatesLunaSizeDataGenerator(object):
def __init__(self, data_path, batch_size, transform_params, patient_ids, data_prep_fun, rng,
full_batch, random, infinite, positive_proportion, **kwargs):
id2positive_annotations = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)
id2negative_annotations = utils_lung.read_luna_negative_candidates(pathfinder.LUNA_CANDIDATES_PATH)
self.file_extension = '.pkl' if 'pkl' in data_path else '.mhd'
self.id2positive_annotations = {}
self.id2negative_annotations = {}
self.patient_paths = []
n_positive, n_negative = 0, 0
for pid in patient_ids:
if pid in id2positive_annotations:
self.id2positive_annotations[pid] = id2positive_annotations[pid]
self.id2negative_annotations[pid] = id2negative_annotations[pid]
self.patient_paths.append(data_path + '/' + pid + self.file_extension)
n_positive += len(id2positive_annotations[pid])
n_negative += len(id2negative_annotations[pid])
print 'n positive', n_positive
print 'n negative', n_negative
self.nsamples = len(self.patient_paths)
print 'n patients', self.nsamples
self.data_path = data_path
self.batch_size = batch_size
self.rng = rng
self.full_batch = full_batch
self.random = random
self.infinite = infinite
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.positive_proportion = positive_proportion
def generate(self):
while True:
rand_idxs = np.arange(self.nsamples)
if self.random:
self.rng.shuffle(rand_idxs)
for pos in xrange(0, len(rand_idxs), self.batch_size):
idxs_batch = rand_idxs[pos:pos + self.batch_size]
nb = len(idxs_batch)
# allocate batches
x_batch = np.zeros((nb, 1) + self.transform_params['patch_size'], dtype='float32')
y_batch = np.zeros((nb, 1), dtype='float32')
patients_ids = []
for i, idx in enumerate(idxs_batch):
patient_path = self.patient_paths[idx]
id = utils_lung.extract_pid_filename(patient_path, self.file_extension)
patients_ids.append(id)
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
if i < np.rint(self.batch_size * self.positive_proportion):
patient_annotations = self.id2positive_annotations[id]
else:
patient_annotations = self.id2negative_annotations[id]
patch_center = patient_annotations[self.rng.randint(len(patient_annotations))]
y_batch[i] = float(patch_center[-1])
x_batch[i, 0, :, :, :] = self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin)
if self.full_batch:
if nb == self.batch_size:
yield x_batch, y_batch, patients_ids
else:
yield x_batch, y_batch, patients_ids
if not self.infinite:
break
class CandidatesLunaSizeValidDataGenerator(object):
def __init__(self, data_path, transform_params, patient_ids, data_prep_fun, **kwargs):
rng = np.random.RandomState(42) # do not change this!!!
id2positive_annotations = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)
id2negative_annotations = utils_lung.read_luna_negative_candidates(pathfinder.LUNA_CANDIDATES_PATH)
self.file_extension = '.pkl' if 'pkl' in data_path else '.mhd'
self.id2positive_annotations = {}
self.id2negative_annotations = {}
self.id2patient_path = {}
n_positive, n_negative = 0, 0
for pid in patient_ids:
if pid in id2positive_annotations:
self.id2positive_annotations[pid] = id2positive_annotations[pid]
negative_annotations = id2negative_annotations[pid]
n_pos = len(id2positive_annotations[pid])
n_neg = len(id2negative_annotations[pid])
neg_idxs = rng.choice(n_neg, size=n_pos, replace=False)
negative_annotations_selected = []
for i in neg_idxs:
negative_annotations_selected.append(negative_annotations[i])
self.id2negative_annotations[pid] = negative_annotations_selected
self.id2patient_path[pid] = data_path + '/' + pid + self.file_extension
n_positive += n_pos
n_negative += n_pos
print 'n positive', n_positive
print 'n negative', n_negative
self.nsamples = len(self.id2patient_path)
self.data_path = data_path
self.rng = rng
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
def generate(self):
for pid in self.id2positive_annotations.iterkeys():
for patch_center in self.id2positive_annotations[pid]:
patient_path = self.id2patient_path[pid]
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
y_batch = np.array([[float(patch_center[-1])]], dtype='float32')
x_batch = np.float32(self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin))[None, None, :, :, :]
yield x_batch, y_batch, [pid]
for patch_center in self.id2negative_annotations[pid]:
patient_path = self.id2patient_path[pid]
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
y_batch = np.array([[0.]], dtype='float32')
x_batch = np.float32(self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin))[None, None, :, :, :]
yield x_batch, y_batch, [pid]
class CandidatesLunaSizeBinDataGenerator(object):
def __init__(self, data_path, batch_size, transform_params, patient_ids, data_prep_fun, rng,
full_batch, random, infinite, positive_proportion, bin_borders = [4,8,20,50], **kwargs):
id2positive_annotations = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)
id2negative_annotations = utils_lung.read_luna_negative_candidates(pathfinder.LUNA_CANDIDATES_PATH)
self.file_extension = '.pkl' if 'pkl' in data_path else '.mhd'
self.id2positive_annotations = {}
self.id2negative_annotations = {}
self.patient_paths = []
n_positive, n_negative = 0, 0
for pid in patient_ids:
if pid in id2positive_annotations:
self.id2positive_annotations[pid] = id2positive_annotations[pid]
self.id2negative_annotations[pid] = id2negative_annotations[pid]
self.patient_paths.append(data_path + '/' + pid + self.file_extension)
n_positive += len(id2positive_annotations[pid])
n_negative += len(id2negative_annotations[pid])
print 'n positive', n_positive
print 'n negative', n_negative
self.nsamples = len(self.patient_paths)
print 'n patients', self.nsamples
self.data_path = data_path
self.batch_size = batch_size
self.rng = rng
self.full_batch = full_batch
self.random = random
self.infinite = infinite
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.positive_proportion = positive_proportion
self.bin_borders = bin_borders
def generate(self):
while True:
rand_idxs = np.arange(self.nsamples)
if self.random:
self.rng.shuffle(rand_idxs)
for pos in xrange(0, len(rand_idxs), self.batch_size):
idxs_batch = rand_idxs[pos:pos + self.batch_size]
nb = len(idxs_batch)
# allocate batches
x_batch = np.zeros((nb,) + self.transform_params['patch_size'], dtype='float32')
y_batch = np.zeros((nb,), dtype='float32')
patients_ids = []
for i, idx in enumerate(idxs_batch):
patient_path = self.patient_paths[idx]
id = utils_lung.extract_pid_filename(patient_path, self.file_extension)
patients_ids.append(id)
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
if i < np.rint(self.batch_size * self.positive_proportion):
patient_annotations = self.id2positive_annotations[id]
else:
patient_annotations = self.id2negative_annotations[id]
patch_center = patient_annotations[self.rng.randint(len(patient_annotations))]
diameter = patch_center[-1]
if diameter > 0.:
ybin = 0
for idx, border in enumerate(self.bin_borders):
if diameter<border:
ybin = idx
break
y_batch[i] = 1. + ybin
else:
y_batch[i] = 0.
#print 'y_batch[i]', y_batch[i], 'diameter', diameter
x_batch[i, :, :, :] = self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin)
if self.full_batch:
if nb == self.batch_size:
yield x_batch, y_batch, patients_ids
else:
yield x_batch, y_batch, patients_ids
if not self.infinite:
break
class CandidatesLunaSizeBinValidDataGenerator(object):
def __init__(self, data_path, transform_params, patient_ids, data_prep_fun, bin_borders = [4,8,20,50], **kwargs):
rng = np.random.RandomState(42) # do not change this!!!
id2positive_annotations = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)
id2negative_annotations = utils_lung.read_luna_negative_candidates(pathfinder.LUNA_CANDIDATES_PATH)
self.file_extension = '.pkl' if 'pkl' in data_path else '.mhd'
self.id2positive_annotations = {}
self.id2negative_annotations = {}
self.id2patient_path = {}
n_positive, n_negative = 0, 0
for pid in patient_ids:
if pid in id2positive_annotations:
self.id2positive_annotations[pid] = id2positive_annotations[pid]
negative_annotations = id2negative_annotations[pid]
n_pos = len(id2positive_annotations[pid])
n_neg = len(id2negative_annotations[pid])
neg_idxs = rng.choice(n_neg, size=n_pos, replace=False)
negative_annotations_selected = []
for i in neg_idxs:
negative_annotations_selected.append(negative_annotations[i])
self.id2negative_annotations[pid] = negative_annotations_selected
self.id2patient_path[pid] = data_path + '/' + pid + self.file_extension
n_positive += n_pos
n_negative += n_pos
print 'n positive', n_positive
print 'n negative', n_negative
self.nsamples = len(self.id2patient_path)
self.data_path = data_path
self.rng = rng
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.bin_borders = bin_borders
def generate(self):
for pid in self.id2positive_annotations.iterkeys():
for patch_center in self.id2positive_annotations[pid]:
patient_path = self.id2patient_path[pid]
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
diameter = patch_center[3]
ybin = 0
for idx, border in enumerate(self.bin_borders):
if diameter<border:
ybin = idx
break
y_batch = np.array([1. + ybin], dtype='float32')
x_batch = np.float32(self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin))[None, :, :, :]
yield x_batch, y_batch, [pid]
for patch_center in self.id2negative_annotations[pid]:
patient_path = self.id2patient_path[pid]
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
y_batch = np.array([0.], dtype='float32')
x_batch = np.float32(self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin))[None, :, :, :]
yield x_batch, y_batch, [pid]
class CandidatesLunaPropsDataGenerator(object):
def __init__(self, data_path, batch_size, transform_params, patient_ids, data_prep_fun, rng,
full_batch, random, infinite,
positive_proportion,
order_objectives,
property_type,
property_bin_borders = None,
return_enable_target_vector = False, **kwargs):
id2positive_annotations = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)
id2negative_annotations = utils_lung.read_luna_negative_candidates(pathfinder.LUNA_CANDIDATES_PATH)
self.file_extension = '.pkl' if 'pkl' in data_path else '.mhd'
self.id2positive_annotations = {}
self.id2negative_annotations = {}
self.all_pids = patient_ids
self.pos_pids = []
self.neg_pids = []
n_positive, n_negative = 0, 0
for pid in patient_ids:
if pid in id2positive_annotations:
self.id2positive_annotations[pid] = id2positive_annotations[pid]
self.id2negative_annotations[pid] = id2negative_annotations[pid]
self.pos_pids.append(pid)
n_positive += len(id2positive_annotations[pid])
n_negative += len(id2negative_annotations[pid])
elif pid in id2negative_annotations:
self.id2negative_annotations[pid] = id2negative_annotations[pid]
self.neg_pids.append(pid)
n_negative += len(id2negative_annotations[pid])
else:
print 'WARNING something weird happens'
print 'n positive', n_positive
print 'n negative', n_negative
self.n_neg_cans = n_negative
self.n_pos_cans = n_positive
self.n_pos_pids = len(self.pos_pids)
self.n_neg_pids = len(self.neg_pids)
self.nsamples = len(self.all_pids)
print 'n patients', self.nsamples
self.data_path = data_path
self.batch_size = batch_size
self.rng = rng
self.full_batch = full_batch
self.random = random
self.infinite = infinite
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.positive_proportion = positive_proportion
self.order_objectives = order_objectives
self.property_bin_borders = property_bin_borders
self.property_type = property_type
#self.return_enable_target_vector = return_enable_target_vector
def L2(self, a,b):
return ((a[0]-b[0])**2 + (a[1]-b[1])**2 + (a[2]-b[2])**2)**(0.5)
def build_ground_truth_vector(self, pid, patch_center):
properties={}
feature_vector = np.zeros((len(self.order_objectives)), dtype='float32')
enable_target_vector = np.zeros((len(self.order_objectives)), dtype='float32')
diameter = patch_center[-1]
is_nodule = diameter>0.01
properties['nodule'] = np.float32(is_nodule)
if is_nodule:
if 'size' in self.property_bin_borders:
properties['size'] = np.digitize(diameter, self.property_bin_borders['size'])
else:
properties['size'] = diameter
patient = utils_lung.read_patient_annotations_luna(pid, pathfinder.LUNA_NODULE_ANNOTATIONS_PATH)
#find the nodules in the doctor's annotations
nodule_characteristics = []
for doctor in patient:
for nodule in doctor:
if "centroid_xyz" in nodule:
dist = self.L2(patch_center[:3],nodule["centroid_xyz"][::-1])
if dist < 5:
#print 'found a very close nodule at', dist, ': ', patch_center[:3]
nodule_characteristics.append(nodule['characteristics'])
if len(nodule_characteristics)==0:
print 'WARNING: no nodule found in doctor annotations for ', patch_center
else:
#calculate the median property values
for prop in nodule_characteristics[0]:
if prop in self.order_objectives:
prop_values = []
for nchar in nodule_characteristics:
prop_values.append(float(nchar[prop]))
random_value = self.rng.choice(np.array(prop_values))
if prop in self.property_bin_borders:
properties[prop] = np.digitize(random_value, self.property_bin_borders[prop])
else:
if self.property_type:
if self.property_type[prop] == 'bounded_continuous':
properties[prop] = (random_value-1) / 4.
else:
properties[prop] = random_value-1
else:
raise
for idx, prop in enumerate(self.order_objectives):
if prop in properties:
feature_vector[idx] = properties[prop]
enable_target_vector[idx] = 1.
return feature_vector, enable_target_vector
def generate(self):
while True:
# Construct pid set with
rand_pos_idxs = np.arange(self.n_pos_pids)
rand_neg_idxs = np.arange(self.n_neg_pids)
ptr_pos_idcs = 0
ptr_neg_idcs = 0
if self.random:
self.rng.shuffle(rand_pos_idxs)
self.rng.shuffle(rand_neg_idxs)
n_pos_batch = int(np.rint(self.batch_size * self.positive_proportion))
n_neg_batch = self.batch_size - n_pos_batch
for _idx, pos_pos in enumerate(xrange(0, len(rand_pos_idxs), n_pos_batch)):
pos_idxs_batch = rand_pos_idxs[pos_pos:pos_pos + n_pos_batch]
neg_idxs_batch = rand_neg_idxs[_idx * n_neg_batch:(_idx+1) * n_neg_batch]
nb = len(pos_idxs_batch) + len(neg_idxs_batch)
# allocate batches
x_batch = np.zeros((nb,) + self.transform_params['patch_size'], dtype='float32')
y_batch = np.zeros((nb, len(self.order_objectives)), dtype='float32')
z_batch = np.zeros((nb, len(self.order_objectives)), dtype='float32')
patients_ids = []
batch_ptr = 0
for idx in pos_idxs_batch:
pid = self.pos_pids[idx]
patient_path = self.data_path + '/' + pid + self.file_extension
patients_ids.append(pid)
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
patient_annotations = self.id2positive_annotations[pid]
patch_center = patient_annotations[self.rng.randint(len(patient_annotations))]
y_batch[batch_ptr], z_batch[batch_ptr] = self.build_ground_truth_vector(pid, patch_center)
x_batch[batch_ptr, :, :, :] = self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin)
batch_ptr += 1
for idx in neg_idxs_batch:
pid = self.neg_pids[idx]
patient_path = self.data_path + '/' + pid + self.file_extension
patients_ids.append(pid)
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
patient_annotations = self.id2negative_annotations[pid]
patch_center = patient_annotations[self.rng.randint(len(patient_annotations))]
y_batch[batch_ptr], z_batch[batch_ptr] = self.build_ground_truth_vector(pid, patch_center)
x_batch[batch_ptr, :, :, :] = self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin)
batch_ptr += 1
if self.full_batch:
if nb == self.batch_size:
yield x_batch, y_batch, z_batch, patients_ids
else:
yield x_batch, y_batch, z_batch, patients_ids
if not self.infinite:
break
class CandidatesLunaPropsValidDataGenerator(object):
def __init__(self, data_path, transform_params, patient_ids, data_prep_fun,
order_objectives, property_type, property_bin_borders=None, **kwargs):
rng = np.random.RandomState(42) # do not change this!!!
id2positive_annotations = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)
id2negative_annotations = utils_lung.read_luna_negative_candidates(pathfinder.LUNA_CANDIDATES_PATH)
self.file_extension = '.pkl' if 'pkl' in data_path else '.mhd'
self.id2positive_annotations = {}
self.id2negative_annotations = {}
self.id2patient_path = {}
n_positive, n_negative = 0, 0
for pid in patient_ids:
if pid in id2positive_annotations:
self.id2positive_annotations[pid] = id2positive_annotations[pid]
negative_annotations = id2negative_annotations[pid]
n_pos = len(id2positive_annotations[pid])
n_neg = len(id2negative_annotations[pid])
neg_idxs = rng.choice(n_neg, size=n_pos, replace=False)
negative_annotations_selected = []
for i in neg_idxs:
negative_annotations_selected.append(negative_annotations[i])
self.id2negative_annotations[pid] = negative_annotations_selected
self.id2patient_path[pid] = data_path + '/' + pid + self.file_extension
n_positive += n_pos
n_negative += n_pos
print 'n positive', n_positive
print 'n negative', n_negative
self.nsamples = len(self.id2patient_path)
self.data_path = data_path
self.rng = rng
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.order_objectives = order_objectives
self.property_bin_borders = property_bin_borders
self.property_type = property_type
def L2(self, a,b):
return ((a[0]-b[0])**2 + (a[1]-b[1])**2 + (a[2]-b[2])**2)**(0.5)
def build_ground_truth_vector(self, pid, patch_center):
properties={}
feature_vector = np.zeros((len(self.order_objectives)), dtype='float32')
enable_target_vector = np.zeros((len(self.order_objectives)), dtype='float32')
diameter = patch_center[-1]
is_nodule = diameter>0.01
properties['nodule'] = np.float32(is_nodule)
if is_nodule:
if 'size' in self.property_bin_borders:
properties['size'] = np.digitize(diameter, self.property_bin_borders['size'])
else:
properties['size'] = diameter
patient = utils_lung.read_patient_annotations_luna(pid, pathfinder.LUNA_NODULE_ANNOTATIONS_PATH)
#find the nodules in the doctor's annotations
nodule_characteristics = []
for doctor in patient:
for nodule in doctor:
if "centroid_xyz" in nodule:
dist = self.L2(patch_center[:3],nodule["centroid_xyz"][::-1])
if dist < 5:
#print 'found a very close nodule at', dist, ': ', patch_center[:3]
nodule_characteristics.append(nodule['characteristics'])
if len(nodule_characteristics)==0:
print 'WARNING: no nodule found in doctor annotations for ', patch_center
else:
#calculate the median property values
for prop in nodule_characteristics[0]:
if prop in self.order_objectives:
prop_values = []
for nchar in nodule_characteristics:
prop_values.append(float(nchar[prop]))
if prop in self.property_bin_borders:
median_value = np.median(np.array(prop_values))
properties[prop] = np.digitize(median_value, self.property_bin_borders[prop])
else:
mean_value = np.mean(np.array(prop_values))
if self.property_type:
if self.property_type[prop] == 'bounded_continuous':
properties[prop] = (mean_value-1) / 4.
else:
properties[prop] = mean_value-1
else:
raise
for idx, prop in enumerate(self.order_objectives):
if prop in properties:
feature_vector[idx] = properties[prop]
enable_target_vector[idx] = 1.
return feature_vector, enable_target_vector
def generate(self):
for pid in self.id2positive_annotations.iterkeys():
for patch_center in self.id2positive_annotations[pid]:
patient_path = self.id2patient_path[pid]
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
x_batch = np.float32(self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin))[None, :, :, :]
feature_vector, enable_target_vector = self.build_ground_truth_vector(pid, patch_center)
y_batch = np.array([feature_vector], dtype='float32')
z_batch = np.array([enable_target_vector], dtype='float32')
yield x_batch, y_batch, z_batch, [pid]
for patch_center in self.id2negative_annotations[pid]:
patient_path = self.id2patient_path[pid]
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
x_batch = np.float32(self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin))[None, :, :, :]
feature_vector, enable_target_vector = self.build_ground_truth_vector(pid, patch_center)
y_batch = np.array([feature_vector], dtype='float32')
z_batch = np.array([enable_target_vector], dtype='float32')
yield x_batch, y_batch, z_batch, [pid]
class DSBScanDataGenerator(object):
def __init__(self, data_path, transform_params, data_prep_fun, **kwargs):
self.patient_paths = utils_lung.get_patient_data_paths(data_path)
self.nsamples = len(self.patient_paths)
self.data_path = data_path
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
def generate(self):
for p in self.patient_paths:
pid = utils_lung.extract_pid_dir(p)
img, pixel_spacing = utils_lung.read_dicom_scan(p)
x, tf_matrix = self.data_prep_fun(data=img, pixel_spacing=pixel_spacing)
x = np.float32(x)[None, None, :, :, :]
yield x, None, tf_matrix, pid
class DSBScanLungMaskDataGenerator(object):
def __init__(self, data_path, transform_params, data_prep_fun, exclude_pids=None,
include_pids=None, part_out_of=(1, 1)):
self.patient_paths = utils_lung.get_patient_data_paths(data_path)
this_part = part_out_of[0]
all_parts = part_out_of[1]
part_lenght = int(len(self.patient_paths) / all_parts)
if this_part == all_parts:
self.patient_paths = self.patient_paths[part_lenght * (this_part - 1):]
else:
self.patient_paths = self.patient_paths[part_lenght * (this_part - 1): part_lenght * this_part]
if exclude_pids is not None:
for ep in exclude_pids:
for i in xrange(len(self.patient_paths)):
if ep in self.patient_paths[i]:
self.patient_paths.pop(i)
break
if include_pids is not None:
self.patient_paths = [data_path + '/' + p for p in include_pids]
self.nsamples = len(self.patient_paths)
self.data_path = data_path
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
def generate(self):
for p in self.patient_paths:
pid = utils_lung.extract_pid_dir(p)
img, pixel_spacing = utils_lung.read_dicom_scan(p)
x, lung_mask, tf_matrix = self.data_prep_fun(data=img, pixel_spacing=pixel_spacing)
x = np.float32(x)[None, None, :, :, :]
lung_mask = np.float32(lung_mask)[None, None, :, :, :]
yield x, lung_mask, tf_matrix, pid
class CandidatesDSBDataGenerator(object):
def __init__(self, data_path, transform_params, id2candidates_path, data_prep_fun, exclude_pids=None):
if exclude_pids is not None:
for p in exclude_pids:
id2candidates_path.pop(p, None)
self.id2candidates_path = id2candidates_path
self.id2patient_path = {}
for pid in id2candidates_path.keys():
self.id2patient_path[pid] = data_path + '/' + pid
self.nsamples = len(self.id2patient_path)
self.data_path = data_path
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
def generate(self):
for pid in self.id2candidates_path.iterkeys():
patient_path = self.id2patient_path[pid]
print pid, patient_path
img, pixel_spacing = utils_lung.read_dicom_scan(patient_path)
print self.id2candidates_path[pid]
candidates = utils.load_pkl(self.id2candidates_path[pid])
print candidates.shape
for candidate in candidates:
y_batch = np.array(candidate, dtype='float32')
patch_center = candidate[:3]
x_batch = np.float32(self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing))[None, :, :, :]
yield x_batch, y_batch, [pid]
class CandidatesDSBDataGeneratorTTA(object):
def __init__(self, data_path, transform_params, id2candidates_path, data_prep_fun, exclude_pids=None, tta=64):
if exclude_pids is not None:
for p in exclude_pids:
id2candidates_path.pop(p, None)
self.id2candidates_path = id2candidates_path
self.id2patient_path = {}
for pid in id2candidates_path.keys():
self.id2patient_path[pid] = data_path + '/' + pid
self.nsamples = len(self.id2patient_path)
self.data_path = data_path
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.tta = tta
def generate(self):
for pid in self.id2candidates_path.iterkeys():
patient_path = self.id2patient_path[pid]
print pid, patient_path
img, pixel_spacing = utils_lung.read_dicom_scan(patient_path)
print self.id2candidates_path[pid]
candidates = utils.load_pkl(self.id2candidates_path[pid])
print candidates.shape
for candidate in candidates:
y_batch = np.array(candidate, dtype='float32')
patch_center = candidate[:3]
batch = []
for i in range(self.tta):
batch.append(np.float32(self.data_prep_fun(data=img,
patch_center=patch_center,
pixel_spacing=pixel_spacing)))
x_batch = np.stack(batch)
print x_batch.shape
yield x_batch, y_batch, [pid]
class DSBFeatureDataGenerator(object):
def __init__(self, data_path, batch_size, p_features,
rng, random, infinite, patient_ids=None):
print 'init DSBFeatureDataGenerator'
self.id2label = utils_lung.read_labels(pathfinder.LABELS_PATH)
self.patient_paths = []
if patient_ids is not None:
for pid in patient_ids:
self.patient_paths.append(data_path + '/' + pid)
else:
raise ValueError('provide patient ids')
self.nsamples = len(self.patient_paths)
self.data_path = data_path
self.batch_size = batch_size
self.p_features = p_features
self.rng = rng
self.random = random
self.infinite = infinite
def generate(self):
while True:
rand_idxs = np.arange(self.nsamples)
if self.random:
self.rng.shuffle(rand_idxs)
for pos in xrange(0, len(rand_idxs), self.batch_size):
idxs_batch = rand_idxs[pos:pos + self.batch_size]
x_batch = np.zeros((self.batch_size,)
+ self.p_features['output_shape'], dtype='float32')
y_batch = np.zeros((self.batch_size,), dtype='float32')
pids_batch = []
for i, idx in enumerate(idxs_batch):
patient_path = self.patient_paths[idx]
pid = utils_lung.extract_pid_dir(patient_path)
t_features = utils.load_pkl(patient_path+'.pkl')
if 'reshape' in self.p_features:
t_features = np.reshape(t_features, self.p_features['reshape'])
if 'swapaxes' in self.p_features:
t_features = np.swapaxes(t_features, *self.p_features['swapaxes'])
x_batch[i] = t_features
y_batch[i] = self.id2label.get(pid)
pids_batch.append(pid)
if len(idxs_batch) == self.batch_size:
yield x_batch, y_batch, pids_batch
if not self.infinite:
break
class DSBPatientsDataGenerator(object):
def __init__(self, data_path, batch_size, transform_params, id2candidates_path, id2label, data_prep_fun,
n_candidates_per_patient, rng, random, infinite, candidates_prep_fun, return_patch_locs=False, shuffle_top_n=False, patient_ids=None):
self.id2label = id2label #utils_lung.read_labels(pathfinder.LABELS_PATH)
self.id2candidates_path = id2candidates_path
self.patient_paths = []
if patient_ids is not None:
for pid in patient_ids:
if pid in self.id2candidates_path: # TODO: this should be redundant if fpr and segemntation are correctly generated
self.patient_paths.append(data_path + '/' + pid)
else:
raise ValueError('provide patient ids')
self.nsamples = len(self.patient_paths)
self.data_path = data_path
self.data_prep_fun = data_prep_fun
self.batch_size = batch_size
self.transform_params = transform_params
self.n_candidates_per_patient = n_candidates_per_patient
self.rng = rng
self.random = random
self.infinite = infinite
self.shuffle_top_n = shuffle_top_n
self.return_patch_locs = return_patch_locs
self.candidates_prep_fun = candidates_prep_fun
def generate(self):
while True:
rand_idxs = np.arange(self.nsamples)
if self.random:
self.rng.shuffle(rand_idxs)
for pos in xrange(0, len(rand_idxs), self.batch_size):
idxs_batch = rand_idxs[pos:pos + self.batch_size]
x_batch = np.zeros((self.batch_size, self.n_candidates_per_patient,)
+ self.transform_params['patch_size'], dtype='float32')
if self.return_patch_locs:
x_loc_batch = np.zeros((self.batch_size, self.n_candidates_per_patient, 3), dtype='float32')
y_batch = np.zeros((self.batch_size,), dtype='float32')
pids_batch = []
for i, idx in enumerate(idxs_batch):
patient_path = self.patient_paths[idx]
pid = utils_lung.extract_pid_dir(patient_path)
img, pixel_spacing = utils_lung.read_dicom_scan(patient_path)
all_candidates = utils.load_pkl(self.id2candidates_path[pid])
if self.candidates_prep_fun:
top_candidates = self.candidates_prep_fun(all_candidates, self.n_candidates_per_patient)
else:
top_candidates = all_candidates[:self.n_candidates_per_patient]
if self.shuffle_top_n:
self.rng.shuffle(top_candidates)
if self.return_patch_locs:
#TODO move the normalization to the config file
x_loc_batch[i] = np.float32(top_candidates[:,:3])/512.
x_batch[i] = np.float32(self.data_prep_fun(data=img, pid=pid,
patch_centers=top_candidates,
pixel_spacing=pixel_spacing))[:, :, :, :]
y_batch[i] = self.id2label.get(pid)
pids_batch.append(pid)
if len(idxs_batch) == self.batch_size:
if self.return_patch_locs:
yield x_batch, x_loc_batch, y_batch, pids_batch
else:
yield x_batch, y_batch, pids_batch
if not self.infinite:
break
class DSBPatientsDataGeneratorTTA(object):
def __init__(self, data_path, transform_params, id2candidates_path, id2label, data_prep_fun, candidates_prep_fun,
n_candidates_per_patient, patient_ids, tta=1):
self.id2label = id2label
self.id2candidates_path = id2candidates_path
self.patient_paths = []
if patient_ids is not None:
for pid in patient_ids:
if pid in self.id2candidates_path: # TODO: this should be redundant if fpr and segemntation are correctly generated
self.patient_paths.append(data_path + '/' + pid)
else:
raise ValueError('provide patient ids')
self.nsamples = len(self.patient_paths)
self.data_path = data_path
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.n_candidates_per_patient = n_candidates_per_patient
self.tta = tta
self.candidates_prep_fun = candidates_prep_fun
def generate(self):
print
for idx in xrange(self.nsamples):
x_batch = np.zeros((self.tta, self.n_candidates_per_patient,)
+ self.transform_params['patch_size'], dtype='float32')
y_batch = np.zeros((self.tta,), dtype='float32')
patient_path = self.patient_paths[idx]
pid = utils_lung.extract_pid_dir(patient_path)
img, pixel_spacing = utils_lung.read_dicom_scan(patient_path)
all_candidates = utils.load_pkl(self.id2candidates_path[pid])
if self.candidates_prep_fun:
top_candidates = self.candidates_prep_fun(all_candidates, self.n_candidates_per_patient)
else:
top_candidates = all_candidates[:self.n_candidates_per_patient]
for i in range(self.tta):
x_batch[i] = np.float32(self.data_prep_fun(data=img,
patch_centers=top_candidates,
pixel_spacing=pixel_spacing))[:, :, :, :]
y_batch[i] = self.id2label.get(pid)
yield x_batch, y_batch, pid
class DSBPixelSpacingsGenerator(object):
def __init__(self, data_path, id2candidates_path, patient_ids):
self.id2candidates_path = id2candidates_path
self.patient_paths = []
if patient_ids is not None:
for pid in patient_ids:
if pid in self.id2candidates_path: # TODO: this should be redundant if fpr and segemntation are correctly generated
self.patient_paths.append(data_path + '/' + pid)
else:
raise ValueError('provide patient ids')
self.nsamples = len(self.patient_paths)
self.data_path = data_path
def generate(self):
for idx in xrange(self.nsamples):
patient_path = self.patient_paths[idx]
pid = utils_lung.extract_pid_dir(patient_path)
img, pixel_spacing = utils_lung.read_dicom_scan(patient_path)
yield pid, pixel_spacing
class DSBPatientsDataGenerator_only_heatmap(object):
def __init__(self, data_path, batch_size, transform_params, id2candidates_path, data_prep_fun,
n_candidates_per_patient, rng, random, infinite, candidates_prep_fun, return_patch_locs=False, shuffle_top_n=False, patient_ids=None):
self.id2label = utils_lung.read_labels(pathfinder.LABELS_PATH)
self.id2candidates_path = id2candidates_path
self.patient_paths = []
if patient_ids is not None:
for pid in patient_ids:
if pid in self.id2candidates_path: # TODO: this should be redundant if fpr and segemntation are correctly generated
self.patient_paths.append(data_path + '/' + pid)
else:
raise ValueError('provide patient ids')
self.nsamples = len(self.patient_paths)
self.data_path = data_path
self.data_prep_fun = data_prep_fun
self.batch_size = batch_size
self.transform_params = transform_params
self.rng = rng
self.random = random
self.infinite = infinite
self.shuffle_top_n = shuffle_top_n
self.candidates_prep_fun = candidates_prep_fun
self.n_candidates_per_patient = n_candidates_per_patient
def generate(self):
while True:
rand_idxs = np.arange(self.nsamples)
if self.random:
self.rng.shuffle(rand_idxs)
for pos in xrange(0, len(rand_idxs), self.batch_size):
idxs_batch = rand_idxs[pos:pos + self.batch_size]
x_batch = np.zeros((self.batch_size,)
+ self.transform_params['heatmap_size'], dtype='float32')
y_batch = np.zeros((self.batch_size,), dtype='float32')
pids_batch = []
for i, idx in enumerate(idxs_batch):
patient_path = self.patient_paths[idx]
pid = utils_lung.extract_pid_dir(patient_path)
img, pixel_spacing = utils_lung.read_dicom_scan(patient_path)
all_candidates = utils.load_pkl(self.id2candidates_path[pid])
candidates_w_value = self.candidates_prep_fun(all_candidates)
x_batch[i] = np.float32(self.data_prep_fun(data=img,
candidates=candidates_w_value,
pixel_spacing=pixel_spacing))
y_batch[i] = self.id2label.get(pid)
pids_batch.append(pid)
if len(idxs_batch) == self.batch_size:
yield x_batch, y_batch, pids_batch
if not self.infinite:
break
class DSBPatientsDataGeneratorRandomSelectionNonCancerous(object):
def __init__(self, data_path, batch_size, transform_params, id2candidates_path, data_prep_fun,
n_candidates_per_patient, rng, random, infinite, top_true=10, top_false=16, shuffle_top_n=False, patient_ids=None):
self.id2label = utils_lung.read_labels(pathfinder.LABELS_PATH)
self.id2candidates_path = id2candidates_path
self.patient_paths = []
if patient_ids is not None:
for pid in patient_ids:
if pid in self.id2candidates_path: # TODO: this should be redundant if fpr and segemntation are correctly generated
self.patient_paths.append(data_path + '/' + pid)
else:
raise ValueError('provide patient ids')
self.nsamples = len(self.patient_paths)
self.data_path = data_path
self.data_prep_fun = data_prep_fun
self.batch_size = batch_size
self.transform_params = transform_params
self.n_candidates_per_patient = n_candidates_per_patient
self.rng = rng
self.random = random
self.infinite = infinite
self.shuffle_top_n = shuffle_top_n
self.top_true = top_true
self.top_false = top_false
def generate(self):
while True:
rand_idxs = np.arange(self.nsamples)
if self.random:
self.rng.shuffle(rand_idxs)
for pos in xrange(0, len(rand_idxs), self.batch_size):
idxs_batch = rand_idxs[pos:pos + self.batch_size]
x_batch = np.zeros((self.batch_size, self.n_candidates_per_patient, 1,)
+ self.transform_params['patch_size'], dtype='float32')
y_batch = np.zeros((self.batch_size,), dtype='float32')
pids_batch = []
for i, idx in enumerate(idxs_batch):
patient_path = self.patient_paths[idx]
pid = utils_lung.extract_pid_dir(patient_path)
img, pixel_spacing = utils_lung.read_dicom_scan(patient_path)
all_candidates = utils.load_pkl(self.id2candidates_path[pid])
label = self.id2label.get(pid)
if label:
top_candidates = all_candidates[:self.n_candidates_per_patient]
else:
selection = np.arange(self.top_false)
self.rng.shuffle(selection)
selection = selection[:self.n_candidates_per_patient]
top_candidates = all_candidates[selection]
if self.shuffle_top_n:
self.rng.shuffle(top_candidates)
x_batch[i] = np.float32(self.data_prep_fun(data=img,
patch_centers=top_candidates,
pixel_spacing=pixel_spacing))[:, None, :, :, :]
y_batch[i] = label
pids_batch.append(pid)
if len(idxs_batch) == self.batch_size:
yield x_batch, y_batch, pids_batch
if not self.infinite:
break
#balance between patients with and without cancer
class BalancedDSBPatientsDataGenerator(object):
def __init__(self, data_path, batch_size, transform_params, id2candidates_path, data_prep_fun,
n_candidates_per_patient, rng, random, infinite, shuffle_top_n=False, patient_ids=None):
self.id2label = utils_lung.read_labels(pathfinder.LABELS_PATH)
self.id2candidates_path = id2candidates_path
self.patient_paths = []
if patient_ids is not None:
for pid in patient_ids:
if pid in self.id2candidates_path: # TODO: this should be redundant if fpr and segemntation are correctly generated
self.patient_paths.append(data_path + '/' + pid)
else:
raise ValueError('provide patient ids')
self.pos_ids = []
self.neg_ids = []
for pid in patient_ids:
if self.id2label[pid]:
self.pos_ids.append(pid)
else:
self.neg_ids.append(pid)
self.n_pos_ids = len(self.pos_ids)
self.n_neg_ids = len(self.neg_ids)
print 'n positive ids', self.n_pos_ids
print 'n negative ids', self.n_neg_ids
self.all_pids = patient_ids
self.nsamples = len(self.all_pids)
self.data_path = data_path
self.data_prep_fun = data_prep_fun
self.batch_size = batch_size
self.transform_params = transform_params
self.n_candidates_per_patient = n_candidates_per_patient
self.rng = rng
self.random = random
self.infinite = infinite
self.shuffle_top_n = shuffle_top_n
def generate(self):
while True:
neg_rand_idxs = np.arange(self.n_neg_ids)
if self.random:
self.rng.shuffle(neg_rand_idxs)
neg_rand_idxs_ptr = 0
batch_pids = []
while(neg_rand_idxs_ptr<self.n_neg_ids):
if self.rng.randint(2):
#take a cancerous patient
pos_pid = self.rng.choice(self.pos_ids)
batch_pids.append(pos_pid)
else:
neg_pid = self.neg_ids[neg_rand_idxs[neg_rand_idxs_ptr]]
batch_pids.append(neg_pid)
neg_rand_idxs_ptr += 1
if len(batch_pids)==self.batch_size:
yield self.prepare_batch(batch_pids)
batch_pids = []
# yield the half filled batch
if len(batch_pids) > 0:
yield self.prepare_batch(batch_pids)
if not self.infinite:
break
def prepare_batch(self, batch_pids):
x_batch = np.zeros((len(batch_pids), self.n_candidates_per_patient, 1,)
+ self.transform_params['patch_size'], dtype='float32')
y_batch = np.zeros((len(batch_pids),), dtype='float32')
for i, pid in enumerate(batch_pids):
patient_path = self.data_path + '/' + str(pid)
img, pixel_spacing = utils_lung.read_dicom_scan(patient_path)
all_candidates = utils.load_pkl(self.id2candidates_path[pid])
top_candidates = all_candidates[:self.n_candidates_per_patient]
if self.shuffle_top_n:
self.rng.shuffle(top_candidates)
x_batch[i] = np.float32(self.data_prep_fun(data=img,
patch_centers=top_candidates,
pixel_spacing=pixel_spacing))[:, None, :, :, :]
y_batch[i] = self.id2label.get(pid)
return x_batch, y_batch, batch_pids
class DSBDataGenerator(object):
def __init__(self, data_path, transform_params=None, data_prep_fun=None, patient_pids=None, **kwargs):
self.patient_paths = utils_lung.get_patient_data_paths(data_path)
self.patient_paths = [data_path + '/' + p for p in patient_pids]
self.nsamples = len(self.patient_paths)
self.data_path = data_path
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
def generate(self):
for p in self.patient_paths:
pid = utils_lung.extract_pid_dir(p)
img, pixel_spacing = utils_lung.read_dicom_scan(p)
if self.data_prep_fun:
x, tf_matrix = self.data_prep_fun(data=img, pixel_spacing=pixel_spacing)
else:
x = img
x = np.float32(x)
yield x, pid
class CandidatesPropertiesLunaDataGenerator(object):
def __init__(self, data_path, batch_size, transform_params, label_prep_fun,
nproperties, patient_ids, data_prep_fun, rng,
full_batch, random, infinite, positive_proportion, properties_included=[],
random_negative_samples=False, **kwargs):
id2positive_annotations = utils_lung.read_luna_properties(pathfinder.LUNA_PROPERTIES_PATH)
id2negative_annotations = utils_lung.read_luna_negative_candidates(pathfinder.LUNA_CANDIDATES_PATH)
self.file_extension = '.pkl' if 'pkl' in data_path else '.mhd'
self.id2positive_annotations = {}
self.id2negative_annotations = {}
self.pid2patient_path = {}
n_positive = 0
for pid in patient_ids:
self.pid2patient_path[pid] = data_path + '/' + pid + self.file_extension
if pid in id2positive_annotations:
self.id2positive_annotations[pid] = id2positive_annotations[pid]
n_positive += len(id2positive_annotations[pid])
if pid in id2negative_annotations:
self.id2negative_annotations[pid] = id2negative_annotations[pid]
self.nsamples = int(n_positive + (1. - positive_proportion) / positive_proportion * n_positive)
print 'n samples', self.nsamples
self.idx2pid_annotation = {}
i = 0
for pid, annotations in self.id2positive_annotations.iteritems():
for a in annotations:
self.idx2pid_annotation[i] = (pid, a)
i += 1
print 'n positive', len(self.idx2pid_annotation.keys())
if random_negative_samples:
while i < self.nsamples:
self.idx2pid_annotation[i] = (None, None)
i += 1
else:
while i < self.nsamples:
pid = rng.choice(self.id2negative_annotations.keys())
patient_annotations = self.id2negative_annotations[pid]
a = patient_annotations[rng.randint(len(patient_annotations))]
self.idx2pid_annotation[i] = (pid, a)
i += 1
assert len(self.idx2pid_annotation) == self.nsamples
self.data_path = data_path
self.batch_size = batch_size
self.rng = rng
self.full_batch = full_batch
self.random = random
self.infinite = infinite
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.positive_proportion = positive_proportion
self.label_prep_fun = label_prep_fun
self.nlabels = nproperties
if len(properties_included)>0:
self.nlabels=len(properties_included)
self.properties_included = properties_included
assert self.transform_params['pixel_spacing'] == (1., 1., 1.)
def generate(self):
while True:
rand_idxs = np.arange(self.nsamples)
if self.random:
self.rng.shuffle(rand_idxs)
for pos in xrange(0, len(rand_idxs), self.batch_size):
idxs_batch = rand_idxs[pos:pos + self.batch_size]
nb = len(idxs_batch)
# allocate batches
x_batch = np.zeros((nb,) + self.transform_params['patch_size'], dtype='float32')
y_batch = np.zeros((nb, self.nlabels), dtype='float32')
patients_ids = []
for i, idx in enumerate(idxs_batch):
pid, patch_annotation = self.idx2pid_annotation[idx]
if pid is None:
pid = self.rng.choice(self.id2negative_annotations.keys())
patient_annotations = self.id2negative_annotations[pid]
patch_annotation = patient_annotations[self.rng.randint(len(patient_annotations))]
patient_path = self.pid2patient_path[pid]
patients_ids.append(pid)
y_batch[i] = self.label_prep_fun(patch_annotation,self.properties_included)
# print pid, y_batch[i]
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
patch_zyxd = patch_annotation[:4]
x_batch[i, :, :, :] = self.data_prep_fun(data=img, pid = pid,
patch_center=patch_zyxd,
pixel_spacing=pixel_spacing,
luna_origin=origin)
y_batch = np.asarray(y_batch,dtype=np.float32)
if self.full_batch:
if nb == self.batch_size:
yield x_batch, y_batch, patients_ids
else:
yield x_batch, y_batch, patients_ids
if not self.infinite:
break
class CandidatesPropertiesLunaDataGenerator2(object):
def __init__(self, data_path, batch_size, transform_params, label_prep_fun,
nproperties, patient_ids, data_prep_fun, rng,
full_batch, random, infinite, positive_proportion, properties_included=[],
random_negative_samples=False, **kwargs):
id2positive_annotations = utils_lung.read_luna_properties(pathfinder.LUNA_PROPERTIES_PATH)
id2negative_annotations = utils_lung.read_luna_negative_candidates(pathfinder.LUNA_CANDIDATES_PATH)
self.file_extension = '.pkl' if 'pkl' in data_path else '.mhd'
self.id2positive_annotations = {}
self.id2negative_annotations = {}
self.pid2patient_path = {}
n_positive = 0
for pid in patient_ids:
self.pid2patient_path[pid] = data_path + '/' + pid + self.file_extension
if pid in id2positive_annotations:
self.id2positive_annotations[pid] = id2positive_annotations[pid]
n_positive += len(id2positive_annotations[pid])
if pid in id2negative_annotations:
self.id2negative_annotations[pid] = id2negative_annotations[pid]
self.nsamples = int(n_positive + (1. - positive_proportion) / positive_proportion * n_positive)
print 'n samples', self.nsamples
self.idx2pid_annotation = {}
i = 0
for pid, annotations in self.id2positive_annotations.iteritems():
for a in annotations:
self.idx2pid_annotation[i] = (pid, a)
i += 1
print 'n positive', len(self.idx2pid_annotation.keys())
if random_negative_samples:
while i < self.nsamples:
self.idx2pid_annotation[i] = (None, None)
i += 1
else:
while i < self.nsamples:
pid = rng.choice(self.id2negative_annotations.keys())
patient_annotations = self.id2negative_annotations[pid]
a = patient_annotations[rng.randint(len(patient_annotations))]
self.idx2pid_annotation[i] = (pid, a)
i += 1
assert len(self.idx2pid_annotation) == self.nsamples
self.data_path = data_path
self.batch_size = batch_size
self.rng = rng
self.full_batch = full_batch
self.random = random
self.infinite = infinite
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.positive_proportion = positive_proportion
self.label_prep_fun = label_prep_fun
self.nlabels = nproperties
if len(properties_included)>0:
self.nlabels=len(properties_included)
self.properties_included = properties_included
assert self.transform_params['pixel_spacing'] == (1., 1., 1.)
def generate(self):
while True:
rand_idxs = np.arange(self.nsamples)
if self.random:
self.rng.shuffle(rand_idxs)
for pos in xrange(0, len(rand_idxs), self.batch_size):
idxs_batch = rand_idxs[pos:pos + self.batch_size]
nb = len(idxs_batch)
# allocate batches
x_batch = np.zeros((nb,) + self.transform_params['patch_size'], dtype='float32')
if self.nlabels == 1:
y_batch = np.zeros((nb,), dtype='float32')
else:
y_batch = np.zeros((nb, self.nlabels), dtype='float32')
patients_ids = []
for i, idx in enumerate(idxs_batch):
pid, patch_annotation = self.idx2pid_annotation[idx]
if pid is None:
pid = self.rng.choice(self.id2negative_annotations.keys())
patient_annotations = self.id2negative_annotations[pid]
patch_annotation = patient_annotations[self.rng.randint(len(patient_annotations))]
patient_path = self.pid2patient_path[pid]
patients_ids.append(pid)
y_batch[i] = self.label_prep_fun(patch_annotation,self.properties_included)
# print pid, y_batch[i]
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
patch_zyxd = patch_annotation[:4]
x_batch[i, :, :, :] = self.data_prep_fun(data=img, pid = pid,
patch_center=patch_zyxd,
pixel_spacing=pixel_spacing,
luna_origin=origin)
y_batch = np.asarray(y_batch,dtype=np.float32)
if self.full_batch:
if nb == self.batch_size:
yield x_batch, y_batch, patients_ids
else:
yield x_batch, y_batch, patients_ids
if not self.infinite:
break
class CandidatesLunaValidDataGenerator2(object):
def __init__(self, data_path, transform_params, patient_ids, data_prep_fun, label_prep_fun=None,properties_included=[],
**kwargs):
rng = np.random.RandomState(42) # do not change this!!!
id2positive_annotations = utils_lung.read_luna_properties(pathfinder.LUNA_PROPERTIES_PATH)
id2negative_annotations = utils_lung.read_luna_negative_candidates(pathfinder.LUNA_CANDIDATES_PATH)
self.file_extension = '.pkl' if 'pkl' in data_path else '.mhd'
self.id2positive_annotations = {}
self.id2negative_annotations = {}
self.id2patient_path = {}
n_positive, n_negative = 0, 0
for pid in patient_ids:
if pid in id2positive_annotations:
self.id2positive_annotations[pid] = id2positive_annotations[pid]
negative_annotations = id2negative_annotations[pid]
n_pos = len(id2positive_annotations[pid])
n_neg = len(id2negative_annotations[pid])
neg_idxs = rng.choice(n_neg, size=n_pos, replace=False)
negative_annotations_selected = []
for i in neg_idxs:
negative_annotations_selected.append(negative_annotations[i])
self.id2negative_annotations[pid] = negative_annotations_selected
self.id2patient_path[pid] = data_path + '/' + pid + self.file_extension
n_positive += n_pos
n_negative += n_pos
print 'n positive', n_positive
print 'n negative', n_negative
self.nsamples = len(self.id2patient_path)
self.data_path = data_path
self.rng = rng
self.data_prep_fun = data_prep_fun
self.transform_params = transform_params
self.label_prep_fun = label_prep_fun
if label_prep_fun is not None:
assert self.transform_params['pixel_spacing'] == (1., 1., 1.)
self.properties_included = properties_included
def generate(self):
for pid in self.id2positive_annotations.iterkeys():
for patch_center in self.id2positive_annotations[pid]:
patient_path = self.id2patient_path[pid]
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
if self.label_prep_fun is None:
y_batch = np.array([1.], dtype='float32')
else:
y_batch = np.array([self.label_prep_fun(patch_center,self.properties_included)], dtype='float32')
x_batch = np.float32(self.data_prep_fun(data=img, pid=pid,
patch_center=patch_center[0:4],
pixel_spacing=pixel_spacing,
luna_origin=origin))[None, :, :, :]
yield x_batch, y_batch, [pid]
for patch_center in self.id2negative_annotations[pid]:
patient_path = self.id2patient_path[pid]
img, origin, pixel_spacing = utils_lung.read_pkl(patient_path) \
if self.file_extension == '.pkl' else utils_lung.read_mhd(patient_path)
y_batch = np.array([0.], dtype='float32')
x_batch = np.float32(self.data_prep_fun(data=img, pid=pid,
patch_center=patch_center,
pixel_spacing=pixel_spacing,
luna_origin=origin))[None, :, :, :]
yield x_batch, y_batch, [pid]