Diff of /pathaia/util/images.py [000000] .. [7823dd]

Switch to side-by-side view

--- a
+++ b/pathaia/util/images.py
@@ -0,0 +1,204 @@
+# coding: utf8
+"""Useful functions for images."""
+import numpy
+from skimage.io import imread
+from skimage.transform import resize
+from .paths import imfiles_in_folder
+from .types import NDBoolMask, PathLike, NDImage, NDByteImage, Coord
+from ..patches.compat import convert_coords
+import itertools
+from typing import Iterator, List, Tuple, Sequence, Optional, Union, Any
+from nptyping import NDArray, Shape, Float
+
+
+def regular_grid(shape: Coord, interval: Coord, psize: Coord) -> Iterator[Coord]:
+    """
+    Get a regular grid of position on a slide given its dimensions.
+
+    Arguments:
+        shape: (x, y) shape of the window to tile.
+        interval: (x, y) steps between patch samples.
+        psize: (w, h) size of the patches (in pixels).
+
+    Yields:
+        (x, y) positions on a regular grid.
+
+    """
+    psize = convert_coords(psize)
+    interval = convert_coords(interval)
+    shape = convert_coords(shape)
+    step = interval + psize
+    maxj, maxi = step * ((shape - psize) / step + 1)
+    col = numpy.arange(start=0, stop=maxj, step=step[0], dtype=int)
+    line = numpy.arange(start=0, stop=maxi, step=step[1], dtype=int)
+    for i, j in itertools.product(line, col):
+        yield Coord(x=j, y=i)
+
+
+def get_coords_from_mask(
+    mask: NDBoolMask, shape: Coord, interval: Coord, psize: Coord
+) -> Iterator[Coord]:
+    """
+    Get tissue coordinates given a tissue binary mask and slide dimensions.
+
+    Arguments:
+        mask: binary mask where tissue is marked as True.
+        shape: (x, y) shape of the window to tile.
+        interval: (x, y) steps between patch samples.
+        psize: (w, h) size of the patches (in pixels).
+
+    Yields:
+        (x, y) positions on a regular grid.
+    """
+
+    psize = convert_coords(psize)
+    interval = convert_coords(interval)
+    shape = convert_coords(shape)
+    step = interval + psize
+    mask_w, mask_h = (shape - psize) / step + 1
+    mask = resize(mask, (mask_h, mask_w))
+    for i, j in numpy.argwhere(mask):
+        yield step * (j, i)
+
+
+def unlabeled_regular_grid_list(shape: Coord, step: int, psize: int) -> List[Coord]:
+    """
+    Get a regular grid of position on a slide given its dimensions.
+
+    Args:
+        shape: shape (i, j) of the window to tile.
+        step: steps in pixels between patch samples.
+        psize: size of the side of the patch (in pixels).
+
+    Returns:
+        Positions (i, j) on the regular grid.
+
+    """
+    maxi = step * int((shape[0] - (psize - step)) / step) + 1
+    maxj = step * int((shape[1] - (psize - step)) / step) + 1
+    col = numpy.arange(start=0, stop=maxj, step=step, dtype=int)
+    line = numpy.arange(start=0, stop=maxi, step=step, dtype=int)
+    return list(itertools.product(line, col))
+
+
+def images_in_folder(
+    folder: PathLike,
+    authorized: Sequence[str] = (".png", ".jpg", ".jpeg", ".tif", ".tiff"),
+    forbiden: Sequence[str] = ("thumbnail",),
+    randomize: bool = False,
+    datalim: Optional[int] = None,
+    paths: bool = False,
+) -> Iterator[Union[NDByteImage, Tuple[str, NDByteImage]]]:
+    """
+    Get images in a given folder.
+
+    Get all images as numpy arrays (selected by file extension).
+    You can remove terms from the research.
+
+    Args:
+        folder: absolute path to an image directory.
+        authorized: authorized image file extensions.
+        forbiden: non-authorized words in file names.
+        randomize: whether to randomize output list of files.
+        datalim: maximum number of file to extract in folder.
+        paths: whether to return absolute path with image data.
+
+    Yields:
+        Images as numpy arrays, optionally with path.
+
+    """
+    for imfile in imfiles_in_folder(folder, authorized, forbiden, randomize, datalim):
+        if paths:
+            yield imfile, imread(imfile)
+        else:
+            yield imread(imfile)
+
+
+def sample_img(
+    image: NDImage, psize: int, spl_per_image: int, mask: NDBoolMask = None
+) -> List[NDArray[Shape["N"], Float]]:
+    """
+    Split image in patches.
+
+    Args:
+        image: numpy image to fit on.
+        psize: size in pixels of the side of a patch.
+        spl_per_image: maximum number of patches to extract in image.
+        mask: optional boolean array, we sample in true pixels if provided.
+
+    Returns:
+        Patches in the image.
+
+    """
+    img = image.astype(float)
+    spaceshape = (image.shape[0], image.shape[1])
+    di, dj = spaceshape
+    if mask is None:
+        positions = unlabeled_regular_grid_list(spaceshape, psize)
+    else:
+        half_size = int(0.5 * psize)
+        cropped_mask = numpy.zeros_like(mask)
+        cropped_mask[mask > 0] = 1
+        cropped_mask[0 : half_size + 1, :] = 0
+        cropped_mask[di - half_size - 1 : :, :] = 0
+        cropped_mask[:, 0 : half_size + 1] = 0
+        cropped_mask[:, dj - half_size - 1 : :] = 0
+        y, x = numpy.where(cropped_mask > 0)
+        y -= half_size
+        x -= half_size
+        positions = [(i, j) for i, j in zip(y, x)]
+
+    numpy.random.shuffle(positions)
+    positions = positions[0:spl_per_image]
+    patches = [img[i : i + psize, j : j + psize].reshape(-1) for i, j in positions]
+    return patches
+
+
+def sample_img_sep_channels(
+    image: NDByteImage, psize: int, spl_per_image: int, mask: NDBoolMask = None
+) -> Tuple[List[NDArray[Shape["N"], Float]], ...]:
+    """Fit vocabulary on a single image.
+
+    Split image in patches and fit on them.
+
+    Args:
+        image: numpy image to fit on.
+        psize: size in pixels of the side of a patch.
+        spl_per_image: maximum number of patches to extract in image.
+        mask: optional boolean array, we sample in true pixels if provided.
+
+    Returns:
+        Patches in the image in separated channels.
+
+    """
+    img = image.astype(float)
+    n_channels = image.shape[-1]
+    spaceshape = (image.shape[0], image.shape[1])
+    di, dj = spaceshape
+    if mask is None:
+        positions = unlabeled_regular_grid_list(spaceshape, psize)
+    else:
+        half_size = int(0.5 * psize)
+        cropped_mask = numpy.zeros_like(mask)
+        cropped_mask[mask > 0] = 1
+        cropped_mask[0 : half_size + 1, :] = 0
+        cropped_mask[di - half_size - 1 : :, :] = 0
+        cropped_mask[:, 0 : half_size + 1] = 0
+        cropped_mask[:, dj - half_size - 1 : :] = 0
+        y, x = numpy.where(cropped_mask > 0)
+        y -= half_size
+        x -= half_size
+        positions = [(i, j) for i, j in zip(y, x)]
+    numpy.random.shuffle(positions)
+    if len(positions) > spl_per_image:
+        positions = positions[0:spl_per_image]
+
+    patches = []
+    for c in range(n_channels):
+        patches.append(
+            [
+                img[:, :, c][i : i + psize, j : j + psize].reshape(-1)
+                for i, j in positions
+            ]
+        )
+    return tuple(patches)