Diff of /pathaia/util/images.py [000000] .. [7823dd]

Switch to unified view

a b/pathaia/util/images.py
1
# coding: utf8
2
"""Useful functions for images."""
3
import numpy
4
from skimage.io import imread
5
from skimage.transform import resize
6
from .paths import imfiles_in_folder
7
from .types import NDBoolMask, PathLike, NDImage, NDByteImage, Coord
8
from ..patches.compat import convert_coords
9
import itertools
10
from typing import Iterator, List, Tuple, Sequence, Optional, Union, Any
11
from nptyping import NDArray, Shape, Float
12
13
14
def regular_grid(shape: Coord, interval: Coord, psize: Coord) -> Iterator[Coord]:
15
    """
16
    Get a regular grid of position on a slide given its dimensions.
17
18
    Arguments:
19
        shape: (x, y) shape of the window to tile.
20
        interval: (x, y) steps between patch samples.
21
        psize: (w, h) size of the patches (in pixels).
22
23
    Yields:
24
        (x, y) positions on a regular grid.
25
26
    """
27
    psize = convert_coords(psize)
28
    interval = convert_coords(interval)
29
    shape = convert_coords(shape)
30
    step = interval + psize
31
    maxj, maxi = step * ((shape - psize) / step + 1)
32
    col = numpy.arange(start=0, stop=maxj, step=step[0], dtype=int)
33
    line = numpy.arange(start=0, stop=maxi, step=step[1], dtype=int)
34
    for i, j in itertools.product(line, col):
35
        yield Coord(x=j, y=i)
36
37
38
def get_coords_from_mask(
39
    mask: NDBoolMask, shape: Coord, interval: Coord, psize: Coord
40
) -> Iterator[Coord]:
41
    """
42
    Get tissue coordinates given a tissue binary mask and slide dimensions.
43
44
    Arguments:
45
        mask: binary mask where tissue is marked as True.
46
        shape: (x, y) shape of the window to tile.
47
        interval: (x, y) steps between patch samples.
48
        psize: (w, h) size of the patches (in pixels).
49
50
    Yields:
51
        (x, y) positions on a regular grid.
52
    """
53
54
    psize = convert_coords(psize)
55
    interval = convert_coords(interval)
56
    shape = convert_coords(shape)
57
    step = interval + psize
58
    mask_w, mask_h = (shape - psize) / step + 1
59
    mask = resize(mask, (mask_h, mask_w))
60
    for i, j in numpy.argwhere(mask):
61
        yield step * (j, i)
62
63
64
def unlabeled_regular_grid_list(shape: Coord, step: int, psize: int) -> List[Coord]:
65
    """
66
    Get a regular grid of position on a slide given its dimensions.
67
68
    Args:
69
        shape: shape (i, j) of the window to tile.
70
        step: steps in pixels between patch samples.
71
        psize: size of the side of the patch (in pixels).
72
73
    Returns:
74
        Positions (i, j) on the regular grid.
75
76
    """
77
    maxi = step * int((shape[0] - (psize - step)) / step) + 1
78
    maxj = step * int((shape[1] - (psize - step)) / step) + 1
79
    col = numpy.arange(start=0, stop=maxj, step=step, dtype=int)
80
    line = numpy.arange(start=0, stop=maxi, step=step, dtype=int)
81
    return list(itertools.product(line, col))
82
83
84
def images_in_folder(
85
    folder: PathLike,
86
    authorized: Sequence[str] = (".png", ".jpg", ".jpeg", ".tif", ".tiff"),
87
    forbiden: Sequence[str] = ("thumbnail",),
88
    randomize: bool = False,
89
    datalim: Optional[int] = None,
90
    paths: bool = False,
91
) -> Iterator[Union[NDByteImage, Tuple[str, NDByteImage]]]:
92
    """
93
    Get images in a given folder.
94
95
    Get all images as numpy arrays (selected by file extension).
96
    You can remove terms from the research.
97
98
    Args:
99
        folder: absolute path to an image directory.
100
        authorized: authorized image file extensions.
101
        forbiden: non-authorized words in file names.
102
        randomize: whether to randomize output list of files.
103
        datalim: maximum number of file to extract in folder.
104
        paths: whether to return absolute path with image data.
105
106
    Yields:
107
        Images as numpy arrays, optionally with path.
108
109
    """
110
    for imfile in imfiles_in_folder(folder, authorized, forbiden, randomize, datalim):
111
        if paths:
112
            yield imfile, imread(imfile)
113
        else:
114
            yield imread(imfile)
115
116
117
def sample_img(
118
    image: NDImage, psize: int, spl_per_image: int, mask: NDBoolMask = None
119
) -> List[NDArray[Shape["N"], Float]]:
120
    """
121
    Split image in patches.
122
123
    Args:
124
        image: numpy image to fit on.
125
        psize: size in pixels of the side of a patch.
126
        spl_per_image: maximum number of patches to extract in image.
127
        mask: optional boolean array, we sample in true pixels if provided.
128
129
    Returns:
130
        Patches in the image.
131
132
    """
133
    img = image.astype(float)
134
    spaceshape = (image.shape[0], image.shape[1])
135
    di, dj = spaceshape
136
    if mask is None:
137
        positions = unlabeled_regular_grid_list(spaceshape, psize)
138
    else:
139
        half_size = int(0.5 * psize)
140
        cropped_mask = numpy.zeros_like(mask)
141
        cropped_mask[mask > 0] = 1
142
        cropped_mask[0 : half_size + 1, :] = 0
143
        cropped_mask[di - half_size - 1 : :, :] = 0
144
        cropped_mask[:, 0 : half_size + 1] = 0
145
        cropped_mask[:, dj - half_size - 1 : :] = 0
146
        y, x = numpy.where(cropped_mask > 0)
147
        y -= half_size
148
        x -= half_size
149
        positions = [(i, j) for i, j in zip(y, x)]
150
151
    numpy.random.shuffle(positions)
152
    positions = positions[0:spl_per_image]
153
    patches = [img[i : i + psize, j : j + psize].reshape(-1) for i, j in positions]
154
    return patches
155
156
157
def sample_img_sep_channels(
158
    image: NDByteImage, psize: int, spl_per_image: int, mask: NDBoolMask = None
159
) -> Tuple[List[NDArray[Shape["N"], Float]], ...]:
160
    """Fit vocabulary on a single image.
161
162
    Split image in patches and fit on them.
163
164
    Args:
165
        image: numpy image to fit on.
166
        psize: size in pixels of the side of a patch.
167
        spl_per_image: maximum number of patches to extract in image.
168
        mask: optional boolean array, we sample in true pixels if provided.
169
170
    Returns:
171
        Patches in the image in separated channels.
172
173
    """
174
    img = image.astype(float)
175
    n_channels = image.shape[-1]
176
    spaceshape = (image.shape[0], image.shape[1])
177
    di, dj = spaceshape
178
    if mask is None:
179
        positions = unlabeled_regular_grid_list(spaceshape, psize)
180
    else:
181
        half_size = int(0.5 * psize)
182
        cropped_mask = numpy.zeros_like(mask)
183
        cropped_mask[mask > 0] = 1
184
        cropped_mask[0 : half_size + 1, :] = 0
185
        cropped_mask[di - half_size - 1 : :, :] = 0
186
        cropped_mask[:, 0 : half_size + 1] = 0
187
        cropped_mask[:, dj - half_size - 1 : :] = 0
188
        y, x = numpy.where(cropped_mask > 0)
189
        y -= half_size
190
        x -= half_size
191
        positions = [(i, j) for i, j in zip(y, x)]
192
    numpy.random.shuffle(positions)
193
    if len(positions) > spl_per_image:
194
        positions = positions[0:spl_per_image]
195
196
    patches = []
197
    for c in range(n_channels):
198
        patches.append(
199
            [
200
                img[:, :, c][i : i + psize, j : j + psize].reshape(-1)
201
                for i, j in positions
202
            ]
203
        )
204
    return tuple(patches)