|
a |
|
b/pathaia/util/images.py |
|
|
1 |
# coding: utf8 |
|
|
2 |
"""Useful functions for images.""" |
|
|
3 |
import numpy |
|
|
4 |
from skimage.io import imread |
|
|
5 |
from skimage.transform import resize |
|
|
6 |
from .paths import imfiles_in_folder |
|
|
7 |
from .types import NDBoolMask, PathLike, NDImage, NDByteImage, Coord |
|
|
8 |
from ..patches.compat import convert_coords |
|
|
9 |
import itertools |
|
|
10 |
from typing import Iterator, List, Tuple, Sequence, Optional, Union, Any |
|
|
11 |
from nptyping import NDArray, Shape, Float |
|
|
12 |
|
|
|
13 |
|
|
|
14 |
def regular_grid(shape: Coord, interval: Coord, psize: Coord) -> Iterator[Coord]: |
|
|
15 |
""" |
|
|
16 |
Get a regular grid of position on a slide given its dimensions. |
|
|
17 |
|
|
|
18 |
Arguments: |
|
|
19 |
shape: (x, y) shape of the window to tile. |
|
|
20 |
interval: (x, y) steps between patch samples. |
|
|
21 |
psize: (w, h) size of the patches (in pixels). |
|
|
22 |
|
|
|
23 |
Yields: |
|
|
24 |
(x, y) positions on a regular grid. |
|
|
25 |
|
|
|
26 |
""" |
|
|
27 |
psize = convert_coords(psize) |
|
|
28 |
interval = convert_coords(interval) |
|
|
29 |
shape = convert_coords(shape) |
|
|
30 |
step = interval + psize |
|
|
31 |
maxj, maxi = step * ((shape - psize) / step + 1) |
|
|
32 |
col = numpy.arange(start=0, stop=maxj, step=step[0], dtype=int) |
|
|
33 |
line = numpy.arange(start=0, stop=maxi, step=step[1], dtype=int) |
|
|
34 |
for i, j in itertools.product(line, col): |
|
|
35 |
yield Coord(x=j, y=i) |
|
|
36 |
|
|
|
37 |
|
|
|
38 |
def get_coords_from_mask( |
|
|
39 |
mask: NDBoolMask, shape: Coord, interval: Coord, psize: Coord |
|
|
40 |
) -> Iterator[Coord]: |
|
|
41 |
""" |
|
|
42 |
Get tissue coordinates given a tissue binary mask and slide dimensions. |
|
|
43 |
|
|
|
44 |
Arguments: |
|
|
45 |
mask: binary mask where tissue is marked as True. |
|
|
46 |
shape: (x, y) shape of the window to tile. |
|
|
47 |
interval: (x, y) steps between patch samples. |
|
|
48 |
psize: (w, h) size of the patches (in pixels). |
|
|
49 |
|
|
|
50 |
Yields: |
|
|
51 |
(x, y) positions on a regular grid. |
|
|
52 |
""" |
|
|
53 |
|
|
|
54 |
psize = convert_coords(psize) |
|
|
55 |
interval = convert_coords(interval) |
|
|
56 |
shape = convert_coords(shape) |
|
|
57 |
step = interval + psize |
|
|
58 |
mask_w, mask_h = (shape - psize) / step + 1 |
|
|
59 |
mask = resize(mask, (mask_h, mask_w)) |
|
|
60 |
for i, j in numpy.argwhere(mask): |
|
|
61 |
yield step * (j, i) |
|
|
62 |
|
|
|
63 |
|
|
|
64 |
def unlabeled_regular_grid_list(shape: Coord, step: int, psize: int) -> List[Coord]: |
|
|
65 |
""" |
|
|
66 |
Get a regular grid of position on a slide given its dimensions. |
|
|
67 |
|
|
|
68 |
Args: |
|
|
69 |
shape: shape (i, j) of the window to tile. |
|
|
70 |
step: steps in pixels between patch samples. |
|
|
71 |
psize: size of the side of the patch (in pixels). |
|
|
72 |
|
|
|
73 |
Returns: |
|
|
74 |
Positions (i, j) on the regular grid. |
|
|
75 |
|
|
|
76 |
""" |
|
|
77 |
maxi = step * int((shape[0] - (psize - step)) / step) + 1 |
|
|
78 |
maxj = step * int((shape[1] - (psize - step)) / step) + 1 |
|
|
79 |
col = numpy.arange(start=0, stop=maxj, step=step, dtype=int) |
|
|
80 |
line = numpy.arange(start=0, stop=maxi, step=step, dtype=int) |
|
|
81 |
return list(itertools.product(line, col)) |
|
|
82 |
|
|
|
83 |
|
|
|
84 |
def images_in_folder( |
|
|
85 |
folder: PathLike, |
|
|
86 |
authorized: Sequence[str] = (".png", ".jpg", ".jpeg", ".tif", ".tiff"), |
|
|
87 |
forbiden: Sequence[str] = ("thumbnail",), |
|
|
88 |
randomize: bool = False, |
|
|
89 |
datalim: Optional[int] = None, |
|
|
90 |
paths: bool = False, |
|
|
91 |
) -> Iterator[Union[NDByteImage, Tuple[str, NDByteImage]]]: |
|
|
92 |
""" |
|
|
93 |
Get images in a given folder. |
|
|
94 |
|
|
|
95 |
Get all images as numpy arrays (selected by file extension). |
|
|
96 |
You can remove terms from the research. |
|
|
97 |
|
|
|
98 |
Args: |
|
|
99 |
folder: absolute path to an image directory. |
|
|
100 |
authorized: authorized image file extensions. |
|
|
101 |
forbiden: non-authorized words in file names. |
|
|
102 |
randomize: whether to randomize output list of files. |
|
|
103 |
datalim: maximum number of file to extract in folder. |
|
|
104 |
paths: whether to return absolute path with image data. |
|
|
105 |
|
|
|
106 |
Yields: |
|
|
107 |
Images as numpy arrays, optionally with path. |
|
|
108 |
|
|
|
109 |
""" |
|
|
110 |
for imfile in imfiles_in_folder(folder, authorized, forbiden, randomize, datalim): |
|
|
111 |
if paths: |
|
|
112 |
yield imfile, imread(imfile) |
|
|
113 |
else: |
|
|
114 |
yield imread(imfile) |
|
|
115 |
|
|
|
116 |
|
|
|
117 |
def sample_img( |
|
|
118 |
image: NDImage, psize: int, spl_per_image: int, mask: NDBoolMask = None |
|
|
119 |
) -> List[NDArray[Shape["N"], Float]]: |
|
|
120 |
""" |
|
|
121 |
Split image in patches. |
|
|
122 |
|
|
|
123 |
Args: |
|
|
124 |
image: numpy image to fit on. |
|
|
125 |
psize: size in pixels of the side of a patch. |
|
|
126 |
spl_per_image: maximum number of patches to extract in image. |
|
|
127 |
mask: optional boolean array, we sample in true pixels if provided. |
|
|
128 |
|
|
|
129 |
Returns: |
|
|
130 |
Patches in the image. |
|
|
131 |
|
|
|
132 |
""" |
|
|
133 |
img = image.astype(float) |
|
|
134 |
spaceshape = (image.shape[0], image.shape[1]) |
|
|
135 |
di, dj = spaceshape |
|
|
136 |
if mask is None: |
|
|
137 |
positions = unlabeled_regular_grid_list(spaceshape, psize) |
|
|
138 |
else: |
|
|
139 |
half_size = int(0.5 * psize) |
|
|
140 |
cropped_mask = numpy.zeros_like(mask) |
|
|
141 |
cropped_mask[mask > 0] = 1 |
|
|
142 |
cropped_mask[0 : half_size + 1, :] = 0 |
|
|
143 |
cropped_mask[di - half_size - 1 : :, :] = 0 |
|
|
144 |
cropped_mask[:, 0 : half_size + 1] = 0 |
|
|
145 |
cropped_mask[:, dj - half_size - 1 : :] = 0 |
|
|
146 |
y, x = numpy.where(cropped_mask > 0) |
|
|
147 |
y -= half_size |
|
|
148 |
x -= half_size |
|
|
149 |
positions = [(i, j) for i, j in zip(y, x)] |
|
|
150 |
|
|
|
151 |
numpy.random.shuffle(positions) |
|
|
152 |
positions = positions[0:spl_per_image] |
|
|
153 |
patches = [img[i : i + psize, j : j + psize].reshape(-1) for i, j in positions] |
|
|
154 |
return patches |
|
|
155 |
|
|
|
156 |
|
|
|
157 |
def sample_img_sep_channels( |
|
|
158 |
image: NDByteImage, psize: int, spl_per_image: int, mask: NDBoolMask = None |
|
|
159 |
) -> Tuple[List[NDArray[Shape["N"], Float]], ...]: |
|
|
160 |
"""Fit vocabulary on a single image. |
|
|
161 |
|
|
|
162 |
Split image in patches and fit on them. |
|
|
163 |
|
|
|
164 |
Args: |
|
|
165 |
image: numpy image to fit on. |
|
|
166 |
psize: size in pixels of the side of a patch. |
|
|
167 |
spl_per_image: maximum number of patches to extract in image. |
|
|
168 |
mask: optional boolean array, we sample in true pixels if provided. |
|
|
169 |
|
|
|
170 |
Returns: |
|
|
171 |
Patches in the image in separated channels. |
|
|
172 |
|
|
|
173 |
""" |
|
|
174 |
img = image.astype(float) |
|
|
175 |
n_channels = image.shape[-1] |
|
|
176 |
spaceshape = (image.shape[0], image.shape[1]) |
|
|
177 |
di, dj = spaceshape |
|
|
178 |
if mask is None: |
|
|
179 |
positions = unlabeled_regular_grid_list(spaceshape, psize) |
|
|
180 |
else: |
|
|
181 |
half_size = int(0.5 * psize) |
|
|
182 |
cropped_mask = numpy.zeros_like(mask) |
|
|
183 |
cropped_mask[mask > 0] = 1 |
|
|
184 |
cropped_mask[0 : half_size + 1, :] = 0 |
|
|
185 |
cropped_mask[di - half_size - 1 : :, :] = 0 |
|
|
186 |
cropped_mask[:, 0 : half_size + 1] = 0 |
|
|
187 |
cropped_mask[:, dj - half_size - 1 : :] = 0 |
|
|
188 |
y, x = numpy.where(cropped_mask > 0) |
|
|
189 |
y -= half_size |
|
|
190 |
x -= half_size |
|
|
191 |
positions = [(i, j) for i, j in zip(y, x)] |
|
|
192 |
numpy.random.shuffle(positions) |
|
|
193 |
if len(positions) > spl_per_image: |
|
|
194 |
positions = positions[0:spl_per_image] |
|
|
195 |
|
|
|
196 |
patches = [] |
|
|
197 |
for c in range(n_channels): |
|
|
198 |
patches.append( |
|
|
199 |
[ |
|
|
200 |
img[:, :, c][i : i + psize, j : j + psize].reshape(-1) |
|
|
201 |
for i, j in positions |
|
|
202 |
] |
|
|
203 |
) |
|
|
204 |
return tuple(patches) |