[797475]: / dicom_and_image_tools / segment / masterseg.py

Download this file

458 lines (366 with data), 16.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
import sys
import argparse
import os
import datetime
import json
import logging
import SimpleITK as sitk
import numpy as np
import sitkstrats
import bounding
# a flag to run the script in debug mode. ONLY SET in process_command_line.
global DEBUG # pylint: disable=W0604
DEBUG = False
def process_command_line(argv):
'''Parse the command line and do a first-pass on processing them into a
format appropriate for the rest of the script.'''
parser = argparse.ArgumentParser(formatter_class=argparse.
ArgumentDefaultsHelpFormatter)
parser.add_argument(
"image",
help="The image to process on.")
parser.add_argument(
"--nseeds", type=int, default=10,
help="The number of randomly placed seeds to produce.")
parser.add_argument(
'--media_root', default="media_root/",
help="The directory to store temporary and intermediate media output")
parser.add_argument(
'--profile', default=False, action='store_true',
help="Run cProfile on script execution.")
parser.add_argument(
'--log', default="logs/",
help="The directory to place logs in.")
parser.add_argument(
'--seed', default=None, nargs=3, type=int, metavar=('X', 'Y', 'Z'),
help="Add an additional, manually determined seed to the " +
"calculation. Seed should be image-indexed (x, y, z not z, y, x).")
parser.add_argument(
'--debug', default=False, action="store_true",
help="Set the script to run in debug mode, where it produces FAR " +
"fewer intermediate files.")
args = parser.parse_args(argv[1:])
args.media_root = os.path.abspath(args.media_root)
args.image = os.path.abspath(args.image)
args.log = os.path.abspath(args.log)
global DEBUG #pylint: disable=W0603
DEBUG = args.debug
return args
def set_label(fname, label, labsep='-'):
'''Set the label (a string addition of labsep + label) for this filename.
'''
ext = fname[fname.rfind('.'):]
fname = fname[:fname.rfind('.')]+labsep+label+ext
return fname
def opthash(options):
'''Produce a short hash of the input options.'''
import hashlib
sha = hashlib.sha1()
sha.update(str(options))
return sha.hexdigest()[0:8]
def debug_log(func, arg, *args, **kwargs):
'''
Wrapper for mediadir_log that writes to disk only if DEBUG is set to true.
'''
if DEBUG:
return mediadir_log(func, arg, *args, **kwargs)
else:
return func(*arg)
def mediadir_log(func, (in_img, in_opts), mediadir, sha, subdir=None):
'''
Invoke some image processing step in the pipeline and write the resulting
file to the directory appropriate to the algorithm/step
that generated it using its sha and the function. Also decorates the info
object with information about the file's location.
'''
optha = opthash(in_opts)
label = func.__name__
(img, opts) = func(in_img, in_opts)
if subdir is None:
subdir = label
out_fname = os.path.join(mediadir, subdir, sha+"-"+optha+'.nii')
sitkstrats.write(img, out_fname)
opts['file'] = out_fname
return (img, opts)
def configure_strats():
'''
Construct a dictionary that represents the configuration of all
segmentation strategies to be used in the script using command line
arguments. CURRENTLY ACCEPTS NO INPUT.
'''
strats = {
'confidence_connected': {
'seed-independent': {
'strategy': sitkstrats.curvature_flow,
'opts': {'curvature_flow': {'timestep': 0.01,
'iterations': 25}}
},
'seed-dependent': {
'strategy': sitkstrats.confidence_connected,
'opts': {'conf_connect': {'iterations': 2,
'multiplier': 1.5,
'neighborhood': 1},
'dialate': {'radius': 1}}
},
},
'geodesic': {
'seed-independent': {
'strategy': sitkstrats.aniso_gauss_sigmo,
'opts': {"anisodiff": {'timestep': 0.01,
'conductance': 9.0,
'iterations': 50},
"gauss": {'sigma': 1.5},
"sigmoid": {'alpha': -20,
'beta': 50}},
},
'seed-dependent': {
'strategy': sitkstrats.fastmarch_seeded_geocontour,
'opts': {"geodesic": {"propagation_scaling": 2.0,
"iterations": 300,
"curvature_scaling": 1.0,
"max_rms_change": 1e-7},
"seed_shift": 3}
}
},
'watershed': {
'seed-independent': {
'strategy': sitkstrats.aniso_gauss_watershed,
'opts': {"anisodiff": {'timestep': 0.01,
'conductance': 9.0,
'iterations': 50},
"gauss": {'sigma': 1.5},
"watershed": {"level": 20}}
},
'seed-dependent': {
'strategy': sitkstrats.isolate_watershed,
'opts': {}
}
}
}
return strats
def seeddep(imgs, seeds, root_dir, sha, segstrats, lung_size, img_in):
# pick an image, basically at random, from imgs to initialize an array
# that tracks which areas of the image have already been segmented out
segmented = np.zeros(sitk.GetArrayFromImage( # pylint: disable=E1101
imgs.values()[0]).shape)
out_info = {}
for seed in seeds:
try:
if segmented[seed[2], seed[1], seed[0]] >= 2:
logging.info(
"Tried to segment %s but it was already segmented", seed)
continue
except IndexError as err:
sys.stderr.write("Tried to access " + str(seed) + " as " +
str(list(reversed(seed))) + " in img of size " +
str(segmented.shape)+"\n")
logging.error(" ".join([str(seed), str(segmented.shape)]))
logging.error(str(err))
raise
# We want to hold onto images and info dicts for each segmentation,
# and we want to automagically store the info we put in seed_info into
# out_info for returning later => use setdefault
out_imgs = {}
seed_info = out_info.setdefault("-".join([str(k) for k in seed]), {})
# for each strategy we want to segment with, get its name and the
# function that executes it.
for (sname, strat) in [(strnam, segstrats[strnam]['seed-dependent'])
for strnam in segstrats]:
img_in = imgs[sname]
opts = dict(strat['opts'])
opts['seed'] = seed
(tmp_img, tmp_info) = debug_log(strat['strategy'],
(img_in, opts),
root_dir,
sha)
out_imgs[sname] = tmp_img
seed_info[sname] = tmp_info
logging.info("Segmented %s with %s", seed, sname)
# we need the names of the input files so that our options hash is
# dependent on the input images.
seed_indep_hashes = [sitkstrats.hash_img(i)[0:8]
for i in out_imgs.values()]
try:
# First, we compute the segmentation union
(consensus, consensus_info) = mediadir_log(
sitkstrats.segmentation_union,
(out_imgs.values(),
{'threshold': 2.0/3.0,
'max_size': lung_size * 0.5,
'min_size': lung_size * 1e-5,
'indep_img_hashes': seed_indep_hashes}),
root_dir,
sha)
# Then we crop down both the initial image ("img_in") AND the
# segmentation based upon the size of the segmentation.
(crop_seg, crop_seg_info) = sitkstrats.crop_to_segmentation(
img=consensus, seg_img=consensus, padding_px=5)
(crop_img, crop_img_info) = sitkstrats.crop_to_segmentation(
img=img_in, seg_img=consensus, padding_px=5)
# Because we used the image "consensus" for both croppings,
# the images should be cropped the same way
assert crop_seg.GetSize() == crop_img.GetSize()
assert crop_seg_info['origin'] == crop_img_info['origin']
assert crop_seg_info['padding'] == crop_img_info['padding']
logging.info("Cropped %s to %s.", seed, crop_img.GetSize())
consensus_info.update(crop_seg_info)
mediadir_log(
lambda x, y: (x, y),
(crop_seg, crop_seg_info),
root_dir,
sha,
subdir="consensus-label")
mediadir_log(
lambda x, y: (x, y),
(crop_img, crop_img_info),
root_dir,
sha,
subdir="consensus-grey")
except RuntimeWarning as war:
logging.info("Failed %s during consensus: %s", seed, war)
seed_info['consensus'] = "failure"
continue
except ValueError as err:
# this ocurrs when the segmentation runs to the edge of the image.
logging.info("Failed %s during cropping: %s", seed, err)
seed_info['consensus'] = "failure"
continue
logging.info("Finished segmenting %s", seed)
segmented += sitk.GetArrayFromImage(consensus)
seed_info['consensus'] = consensus_info
return out_info
def run_img(img, sha, nseeds, root_dir, addl_seed): # pylint: disable=C0111
'''Run the entire protocol on a particular image starting with sha hash'''
img_info = {}
lung_img, lung_info = debug_log(sitkstrats.segment_lung,
(img, {'probe_size': 7}),
root_dir, sha)
img_info['lungseg'] = lung_info
# (img, tmp_info) = debug_log(sitkstrats.crop_to_segmentation,
# (img_in, lung_img),
# root_dir, sha, subdir="crop")
# lung_img = sitkstrats.crop_to_segmentation(lung_img, lung_img)[0]
# img_info['crop'] = tmp_info
segstrats = configure_strats()
seed_indep_imgs = {}
seed_indep_info = {}
seeds = []
for (sname, strat) in [(strnam, segstrats[strnam]['seed-independent'])
for strnam in segstrats]:
try:
optha = opthash(strat['opts'])
fname = os.path.join(root_dir, strat['strategy'].__name__,
sha + "-" + optha + ".nii")
tmp_img = sitkstrats.read(fname)
tmp_info = strat['opts']
tmp_info['file'] = os.path.join(fname)
logging.info(
"Loaded seed-independent image for '%s', '%s' from file",
sname, fname)
except RuntimeError:
logging.debug(
"Building seed-independent image '%s', '%s'.", sname, fname)
(tmp_img, tmp_info) = debug_log(strat['strategy'],
(img, strat['opts']),
root_dir,
sha)
logging.info(
"Built seed-independent image '%s', '%s' in %s",
sname, fname, tmp_info['time'])
seed_indep_imgs[sname] = tmp_img
seed_indep_info[sname] = tmp_info
# compute seeds, first by taking the centers of mass of a bunch of the
# watershed segemented regions, then by adding a bunch of random ones that
# are inside the lung field.
if nseeds > 1 or addl_seed is None:
(seeds, tmp_info) = sitkstrats.com_calc(img=seed_indep_imgs['watershed'],
max_size=0.05, min_size=1e-5,
lung_img=lung_img)
img_info['deterministic-seeds'] = tmp_info
seeds.extend(sitkstrats.distribute_seeds(lung_img, nseeds-len(seeds)))
else:
print("Provided seed only.")
img_info['deterministic-seeds'] = None
if addl_seed is not None:
# additional seed is given in terms of the input image, and must
# be corrected for cropping.
if 'crop' in img_info:
origin = img_info['crop']['origin']
assert len(addl_seed) == len(origin)
addl_seed = [addl_seed[i] - origin[i]
for i in range(len(addl_seed))]
seeds.insert(0, addl_seed)
if len(seeds) > nseeds:
logging.warning("The number of seeds generated in the deterministic " +
"phase (%s) is greater than the allowed number of " +
"seeds (%s). The list of seeds is being truncated.",
len(seeds), nseeds)
# with many deterministic seeds, this list can be longer than nseeds.
seeds = seeds[0:nseeds]
seg_info = seeddep(seed_indep_imgs, seeds,
root_dir, sha, segstrats, img_info['lungseg']['size'],
img)
img_info['noduleseg'] = {}
for seed in seg_info:
for segstrat in seg_info[seed]:
combined_info = {'seed-dependent': seg_info[seed][segstrat]}
if segstrat in seed_indep_info:
combined_info['seed-independent'] = seed_indep_info[segstrat]
img_info['noduleseg'].setdefault(
seed, {})[segstrat] = combined_info
return img_info
class DateTimeEncoder(json.JSONEncoder): # pylint: disable=C0111
def default(self, obj): # pylint: disable=E0202
if isinstance(obj, datetime.datetime):
return str(obj)
elif isinstance(obj, datetime.timedelta):
return str(obj)
return json.JSONEncoder.default(self, obj)
def write_info(info, filename="masterseg-run.json"):
with open(filename, 'w') as f:
try:
json_out = json.dumps(info, sort_keys=True,
indent=2, separators=(',', ': '),
cls=DateTimeEncoder)
except TypeError as err:
logging.error("Error encountered serializing for JSON, dumping " +
"dict here:\n"+str(info))
raise err
f.write(json_out)
def log_name_gen(sha, log_dir):
'''
Build a the absolute path name of the log file for an image, given its sha
and a log file directory.
'''
logfilename = "-".join([sha, str(datetime.datetime.now())])+".log"
logfilename = logfilename.replace(" ", "-")
logfilename = os.path.join(log_dir, logfilename)
return logfilename
def main(argv=None):
'''Run the driver script for this module. This code only runs if we're
being run as a script. Otherwise, it's silent and just exposes methods.'''
args = process_command_line(argv)
basename = os.path.basename(args.image)
sha = basename[:basename.rfind('.')]
# this only gets run the first time around, so if >1 image is
# specified, they will ALL be in the file named by the sha of the first
# one. But grepping around isn't too hard.
logging.basicConfig(filename=log_name_gen(sha, args.log),
level=logging.DEBUG,
format='%(asctime)s %(message)s')
logging.info("Beginning image %s", args.image)
try:
run_info = run_img(sitkstrats.read(args.image), sha,
args.nseeds, args.media_root, args.seed)
except Exception as exc: # pylint: disable=W0703
logging.critical("Encountered critical exception:\n%s", exc)
raise
write_info(run_info, filename=os.path.join(args.log, sha+"-seg.json"))
return 0
if __name__ == "__main__":
if "--profile" in sys.argv:
import cProfile
sys.exit(cProfile.runctx("main(sys.argv)", globals(),
{"sys.argv": sys.argv}))
else:
sys.exit(main(sys.argv))