[853718]: / bm_dataset / crop_tissue_images.py

Download this file

122 lines (95 with data), 3.7 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
Crop images around major object with set padding
.. note:: Using these scripts for 1+GB images take several tens of GB RAM
Sample usage::
python crop_tissue_images.py \
-i "/datagrid/Medical/dataset_ANHIR/images/COAD_*/scale-100pc/*.png" \
--padding 0.1 --nb_workers 2
Copyright (C) 2016-2019 Jiri Borovec <jiri.borovec@fel.cvut.cz>
"""
import argparse
import gc
import glob
import logging
import os
import sys
import time
from functools import partial
import cv2 as cv
import numpy as np
sys.path += [os.path.abspath('.'), os.path.abspath('..')] # Add path to root
from birl.utilities.dataset import (
args_expand_parse_images,
find_largest_object,
load_large_image,
project_object_edge,
save_large_image,
)
from birl.utilities.experiments import get_nb_workers, iterate_mproc_map, try_decorator
NB_WORKERS = get_nb_workers(0.5)
SCALE_SIZE = 512
CUT_DIMENSION = 0
TISSUE_CONTENT = 0.01
def arg_parse_params():
""" parse the input parameters
:return dict: {str: any}
"""
# SEE: https://docs.python.org/3/library/argparse.html
parser = argparse.ArgumentParser()
parser.add_argument(
'--padding', type=float, required=False, default=0.1, help='padding around the object in image percents'
)
args = args_expand_parse_images(parser, NB_WORKERS, overwrite=False)
logging.info('ARGUMENTS: \n%r' % args)
return args
@try_decorator
def crop_image(img_path, crop_dims=(0, 1), padding=0.15):
""" crop umages to by tight around tissue
:param str img_path: path to image
:param tuple(int) crop_dims: crop in selected dimensions
:param float padding: padding around tissue
"""
img = load_large_image(img_path)
scale_factor = max(1, np.mean(img.shape[:2]) / float(SCALE_SIZE))
# work with just a scaled version
sc = 1. / scale_factor
order = cv.INTER_AREA if scale_factor > 1 else cv.INTER_LINEAR
img_small = 255 - cv.resize(img, None, fx=sc, fy=sc, interpolation=order)
crops = {}
for crop_dim in crop_dims:
if crop_dim not in (0, 1):
raise ValueError('not supported dimension: %i' % crop_dim)
img_edge = project_object_edge(img_small, crop_dim)
begin, end = find_largest_object(img_edge, threshold=TISSUE_CONTENT)
# img_diag = int(np.sqrt(img.shape[0] ** 2 + img.shape[1] ** 2))
pad_px = padding * (end - begin) * scale_factor
begin_px = max(0, int((begin * scale_factor) - pad_px))
end_px = min(img.shape[crop_dim], int((end * scale_factor) + pad_px))
crops[crop_dim] = (begin_px, end_px)
del img_small
for _ in range(2):
if 0 not in crops:
crops[0] = (0, img.shape[0])
img = img[crops[0][0]:crops[0][1], crops[1][0]:crops[1][1], ...]
save_large_image(img_path, img)
gc.collect()
time.sleep(1)
def main(path_images, padding, nb_workers):
""" main entry point
:param str path_images: path to the images
:param float padding: percentage of the image size to be used as padding
around detected tissue in the scan image, the range is (0, 1)
:param int nb_workers: nb jobs running in parallel
"""
image_paths = sorted(glob.glob(path_images))
if not image_paths:
logging.info('No images found on "%s"', path_images)
return
_wrap_crop = partial(crop_image, padding=padding)
list(iterate_mproc_map(_wrap_crop, image_paths, desc='Crop image tissue', nb_workers=nb_workers))
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
arg_params = arg_parse_params()
logging.info('running...')
main(**arg_params)
logging.info('DONE')