--- a +++ b/slideflow/slide/qc/otsu.py @@ -0,0 +1,198 @@ +"""Otsu's thresholding QC algorithm.""" + +import cv2 +import numpy as np +import slideflow as sf +import rasterio +import shapely.affinity as sa +from slideflow import errors +from typing import Union, Optional + + +def _apply_mask(image, mask): + resized_mask = cv2.resize( + (~mask).astype(np.uint8), + (image.shape[1], image.shape[0]), + interpolation=cv2.INTER_NEAREST + ) + return cv2.bitwise_or(image, image, mask=resized_mask) + + +def _get_level_for_otsu(wsi: "sf.WSI", min_size: int = 500) -> int: + """Find the smallest downsample level of a minimum size.""" + smallest_dim = np.array([min(L['dimensions']) for L in wsi.levels]) + level_ids = np.array([L['level'] for L in wsi.levels]) + sorted_idx = np.argsort(smallest_dim) + try: + best_idx = np.where(smallest_dim[sorted_idx] > min_size)[0][0] + except IndexError: + # If the slide is smaller than the target minimum dimension, + # use the full slide image + best_idx = sorted_idx[-1] + return level_ids[sorted_idx][best_idx] + +# ----------------------------------------------------------------------------- + +class Otsu: + + def __init__(self, slide_level: Optional[int] = None): + """Prepare Otsu's thresholding algorithm for filtering a slide. + + This method is used to detect areas of tissue and remove background. + + This QC method works by obtaining a thumbnail of a slide, and converting + the image into the HSV color space. The HSV image undergoes a median blur + using OpenCV with a kernel size of 7, and the image is thresholded + using ``cv2.THRESH_OTSU``. This results in a binary mask, which + is then applied to the slide for filtering. + + Original paper: https://ieeexplore.ieee.org/document/4310076 + + .. warning:: + + Otsu's thresholding may give unexpected results with slides + that have many large pen marks, erroneously identifying pen marks + as tissue and removing the actual tissue as background. + This behavior can be circumvented by applying a Gaussian filter + before Otsu's thresholding. + + .. code-block:: python + + import slideflow as sf + from slideflow.slide import qc + + wsi = sf.WSI(...) + gaussian = qc.GaussianV2() + otsu = qc.Otsu() + wsi.qc([gaussian, otsu]) + + Examples + Apply Otsu's thresholding to a slide. + + .. code-block:: python + + import slideflow as sf + from slideflow.slide import qc + + wsi = sf.WSI(...) + otsu = qc.Otsu() + wsi.qc(otsu) + + Args: + level (int): Slide pyramid level at which to perform filtering. + Defaults to second-lowest available level. + """ + self.level = slide_level + + def __repr__(self): + return "Otsu(slide_level={!r})".format( + self.level + ) + + def _thumb_from_slide( + self, + wsi: "sf.WSI", + ) -> np.ndarray: + """Get a thumbnail from the given slide. + + Args: + wsi (sf.WSI): Whole-slide image. + + Returns: + np.ndarray: RGB thumbnail of the whole-slide image. + """ + if self.level is None: + # Otsu's thresholding can be done on the smallest downsample level, + # with the smallest dimension being at least 500 pixels + level = _get_level_for_otsu(wsi, min_size=500) + else: + level = self.level + + try: + if wsi.slide.has_levels: + sf.log.debug("Applying Otsu's thresholding at level={}".format(level)) + thumb = wsi.slide.read_level(level=level, to_numpy=True) + else: + sf.log.debug("Applying Otsu's thresholding at level=None") + thumb = wsi.slide.read_level(to_numpy=True) + except Exception as e: + raise errors.QCError( + f"Thumbnail error for slide {wsi.shortname}, QC failed: {e}" + ) + if thumb.shape[-1] == 4: + thumb = thumb[:, :, :3] + + # Only apply Otsu thresholding within ROI, if present + # If ROI is the ROI_issues, invert it + if wsi.has_rois(): + ofact = 1 / wsi.slide.level_downsamples[level] + roi_mask = np.zeros((thumb.shape[0], thumb.shape[1])) + + # Scale ROIs to thumbnail size + scaled_polys = wsi._scale_polys( + [roi.poly for roi in wsi.get_rois(ignore_artifact=True)], + xfact=ofact, + yfact=ofact, + ) + scaled_issues_polys = wsi._scale_polys( + [roi.invert(*wsi.dimensions).poly for roi in wsi.get_artifacts()], + xfact=ofact, + yfact=ofact, + ) + # Rasterize scaled ROIs + if len(scaled_polys) > 0: + roi_mask = rasterio.features.rasterize( + scaled_polys, + out_shape=thumb.shape[:2] + ) + if len(scaled_issues_polys) > 0: + roi_mask_issues = rasterio.features.rasterize( + scaled_issues_polys, + out_shape=thumb.shape[:2] + ) + # If there are artifacts, remove them from the ROI mask + if len(scaled_polys) > 0: + roi_mask = np.minimum(roi_mask_issues, roi_mask) + else: + roi_mask = roi_mask_issues + + if wsi.roi_method == 'outside': + roi_mask = ~roi_mask + thumb = cv2.bitwise_or( + thumb, + thumb, + mask=roi_mask.astype(np.uint8) + ) + # Only apply Otsu thresholding within areas not already removed + # with other QC methods. + if wsi.has_non_roi_qc(): + thumb = _apply_mask(thumb, wsi.get_qc_mask(roi=False)) + return thumb + + def __call__( + self, + wsi: Union["sf.WSI", np.ndarray], + mask: Optional[np.ndarray] = None, + ) -> np.ndarray: + """Perform Otsu's thresholding on the given slide or image. + + Args: + slide (sf.WSI, np.ndarray): Either a Slideflow WSI or a numpy array, + with shape (h, w, c) and type np.uint8. + mask (np.ndarray): Restrict Otsu's threshold to the area of the + image indicated by this boolean mask. Defaults to None. + + Returns: + np.ndarray: QC boolean mask, where True = filtered out. + """ + if isinstance(wsi, sf.WSI): + thumb = self._thumb_from_slide(wsi) + else: + thumb = wsi + if mask is not None: + thumb = _apply_mask(thumb, mask) + hsv_img = cv2.cvtColor(thumb, cv2.COLOR_RGB2HSV) + img_med = cv2.medianBlur(hsv_img[:, :, 1], 7) + flags = cv2.THRESH_OTSU+cv2.THRESH_BINARY_INV + _, otsu_mask = cv2.threshold(img_med, 0, 255, flags) + return otsu_mask.astype(bool) \ No newline at end of file