[853718]: / bm_dataset / split_images_two_tissues.py

Download this file

133 lines (105 with data), 4.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""
Splitting image containing two samples
.. note:: Using these scripts for 1+GB images take several tens of GB RAM
Sample usage::
python split_images_two_tissues.py \
-i "/datagrid/Medical/dataset_ANHIR/images/COAD_*/scale-100pc/*_*.png" \
--nb_workers 3
Copyright (C) 2016-2019 Jiri Borovec <jiri.borovec@fel.cvut.cz>
"""
import argparse
import gc
import glob
import logging
import os
import sys
import time
from functools import partial
import cv2 as cv
sys.path += [os.path.abspath('.'), os.path.abspath('..')] # Add path to root
from birl.utilities.dataset import (
args_expand_parse_images,
find_split_objects,
load_large_image,
project_object_edge,
save_large_image,
)
from birl.utilities.experiments import get_nb_workers, iterate_mproc_map
NB_WORKERS = get_nb_workers(0.5)
#: use following image size for estimating cutting line
SCALE_SIZE = 512
#: cut image in one dimension/axis
CUT_DIMENSION = 0
def arg_parse_params():
""" parse the input parameters
:return dict: parameters
"""
# SEE: https://docs.python.org/3/library/argparse.html
parser = argparse.ArgumentParser()
parser.add_argument(
'--dimension', type=int, required=False, choices=[0, 1], help='cutting dimension', default=CUT_DIMENSION
)
args = args_expand_parse_images(parser, NB_WORKERS)
logging.info('ARGUMENTS: \n%r' % args)
return args
def split_image(img_path, overwrite=False, cut_dim=CUT_DIMENSION):
""" split two images in single dimension
the input images assume to contain two names in the image name separated by "_"
:param str img_path: path to the input / output image
:param bool overwrite: allow overwrite exiting output images
:param int cut_dim: define splitting dimension
"""
name, ext = os.path.splitext(os.path.basename(img_path))
folder = os.path.dirname(img_path)
obj_names = name.split('_')
paths_img = [os.path.join(folder, obj_name + ext) for obj_name in obj_names]
if all(os.path.isfile(p) for p in paths_img) and not overwrite:
logging.debug('existing all splits of %r', paths_img)
return
img = load_large_image(img_path)
# work with just a scaled version
scale_factor = max(1, img.shape[cut_dim] / float(SCALE_SIZE))
sc = 1. / scale_factor
order = cv.INTER_AREA if scale_factor > 1 else cv.INTER_LINEAR
img_small = 255 - cv.resize(img, None, fx=sc, fy=sc, interpolation=order)
img_edge = project_object_edge(img_small, cut_dim)
del img_small
# prepare all cut edges and scale them to original image size
splits = find_split_objects(img_edge, nb_objects=len(obj_names))
if not splits:
logging.error('no splits found for %s', img_path)
return
edges = [int(round(i * scale_factor)) for i in [0] + splits + [len(img_edge)]]
# cutting images
for i, path_img_cut in enumerate(paths_img):
if os.path.isfile(path_img_cut) and not overwrite:
logging.debug('existing "%s"', path_img_cut)
continue
if cut_dim == 0:
img_cut = img[edges[i]:edges[i + 1], ...]
elif cut_dim == 1:
img_cut = img[:, edges[i]:edges[i + 1], ...]
else:
raise ValueError('unsupported dimension: %i' % cut_dim)
save_large_image(path_img_cut, img_cut)
gc.collect()
time.sleep(1)
def main(path_images, dimension, overwrite, nb_workers):
""" main entry point
:param path_images: path to images
:param int dimension: for 2D inages it is 0 or 1
:param bool overwrite: whether overwrite existing image on output
:param int nb_workers: nb jobs running in parallel
"""
image_paths = sorted(glob.glob(path_images))
if not image_paths:
logging.info('No images found on "%s"', path_images)
return
_wrap_split = partial(split_image, cut_dim=dimension, overwrite=overwrite)
list(iterate_mproc_map(_wrap_split, image_paths, desc='Cut image tissues', nb_workers=nb_workers))
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
arg_params = arg_parse_params()
logging.info('running...')
main(**arg_params)
logging.info('DONE')