slideslicer / Git / [5a7589] /scripts/convert_roi

Models:
DanielG/
slideslicer
Downloads: 1
[5a7589]: / scripts / convert_roi_sandbox.py
History
Download this file
633 lines (479 with data), 19.4 kB

# coding: utf-8

#cell#

from bs4 import BeautifulSoup
import numpy as np
from collections import Counter
import pandas as pd
import re
import json
# import pyaml

#cell#

# see https://github.com/bgilbert/anonymize-slide

#cell#

#cell#

def get_ellipse_points(verticeslist, num=200):
    a,b = tuple(abs(verticeslist[1] - verticeslist[0])/2)
    xc, yc = tuple(abs(verticeslist[1] + verticeslist[0])/2)
    tt = np.arange(0,2*np.pi, 2*np.pi/num)
    x = xc + a*np.cos(tt)
    y = yc + b* np.sin(tt)
    return np.c_[x,y].tolist()
#     return ((x),list(y))

#cell#

def get_vertices(region):
    verticeslist = [cc for cc in region.vertices.children if cc!='\n']
    verticeslist = [(vv.get('x'), vv.get('y')) for vv in verticeslist]
    verticeslist = [(float(x), float(y)) for x,y in verticeslist]
#     verticeslist = np.asarray(verticeslist)
    if region["type"] == '2':
        verticeslist = get_ellipse_points(np.asarray(verticeslist), num=200)
    return verticeslist

# ## functions for rotating, slicing, and stitching the picture

#cell#

import openslide
from PIL import Image, ImageDraw
import cv2
import numpy as np

#cell#

def convert_contour2mask(roi, width, height, fill=1, shape='polygon', radius=3):
    img = Image.new('L', (width, height), 0)
    if len(roi)>1 and shape=='polygon':# roi.shape[0]>1:
        roi = [tuple(x) for x in roi]
        ImageDraw.Draw(img).polygon(roi, outline=fill, fill=fill)
        mask = np.asarray(img, dtype='uint8')
    else:
        ImageDraw.Draw(img).point(roi, fill=255)
        img = img.filter(ImageFilter.GaussianBlur(radius=radius))
        thr = np.exp(-0.5)*img.getextrema()[1]
        mask = fill*np.asarray(np.asarray(img,)>thr, dtype='uint8')
    return mask

#cell#

def map_countour_bbox(contour, slide_dimensions,
                     SUBSAMPLE_RATE=8):
    
    xmin_macro, ymin_macro = np.min(contour, axis=0)
    xmax_macro, ymax_macro = np.max(contour, axis=0)
    
    xminf, xmaxf = 1.0*xmin_macro/macro.shape[1], 1.0*xmax_macro/macro.shape[1]
    yminf, ymaxf = 1.0*ymin_macro/macro.shape[0], 1.0*ymax_macro/macro.shape[0]
    xtotf = xmaxf - xminf
    ytotf = ymaxf - yminf
    print("xtotf", xtotf)
    print("ytotf", ytotf)

    # xminf*slide.dimensions[1]
    ymin_hr = int(round(yminf*slide_dimensions[1]))
    xmin_hr =  int(round(xminf*slide_dimensions[0]))
    ymax_hr = int(round(ymaxf*slide_dimensions[1]))
    xmax_hr =  int(round(xmaxf*slide_dimensions[0]))
    
    ytot_hr = ymax_hr - ymin_hr
    xtot_hr = xmax_hr - xmin_hr
    print(xtot_hr, ytot_hr)
    
    
    loc_hr = (xmin_hr, ymin_hr)
    size_hr = (xtot_hr, ytot_hr)
    size_hr = [SUBSAMPLE_RATE*(ss//SUBSAMPLE_RATE) for ss in size_hr]
    level_hr = 0
    
    img_hr = slide.read_region(loc_hr, level_hr, size_hr)
    target_size = [s//SUBSAMPLE_RATE for s in img_hr.size]
    print("target_size", target_size)

    img_hr.thumbnail(target_size, Image.ANTIALIAS)
    return img_hr

#cell#

def get_median_color(slide):
    return np.apply_over_axes(np.median, 
                              np.asarray(slide.associated_images["thumbnail"]),
                              [0,1]).ravel()

#cell#

def get_threshold_tissue_mask(img, color=False, filtersize=7,
                   lower = [0, 0, 180],
                   upper = [179, 20, 255]):
    
    kernel = (filtersize,filtersize)
    if color:
        # For HSV, Hue range is [0,179], Saturation range is [0,255] and Value range is [0,255]. 
        imghsv = cv2.cvtColor(img,  cv2.COLOR_BGR2HSV)
        imghsv[:,:,-1] = cv2.GaussianBlur(imghsv[:,:,-1],kernel,0)
        imghsv[:,:,1] = cv2.GaussianBlur(imghsv[:,:,1],kernel,0)
        lower = np.asarray(lower)
        upper = np.asarray(upper)
        mask = cv2.inRange(imghsv, lower, upper)
    else:
        imgavg = np.mean(img, axis=-1).astype('uint8')
        blur = cv2.GaussianBlur(imgavg,kernel,0)
        ret3, mask = cv2.threshold(blur,0,1,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    return (~mask.astype(bool)).astype('uint8')

def convert_mask2contour(mask, minlen = 50):
    im2, contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    if minlen is not None:
        contours = [np.squeeze(x) for x in contours if x.shape[0]>minlen]
    return contours

def get_chunk_countours(img, color=False, filtersize=7, minlen = 100):
    mask = get_threshold_tissue_mask(img, color=color, filtersize=filtersize)
    contours = convert_mask2contour(mask, minlen = minlen)
    return contours

#cell#

def get_thumbnail_magnification(slide):
    """get ratio of magnified / thumbnail dimension
    assumes no isotropic scaling (indeed it is slightly anisotropic)"""
    ratio = np.asarray(slide.dimensions) / np.asarray(slide.associated_images["thumbnail"].size)
     # np.sqrt(np.prod(ratio))
    return ratio

def roi_loc(roi):
    xmin, ymin = roi.min(0)
    xmax, ymax = roi.max(0)
    return np.r_[xmin, ymin], np.r_[xmax-xmin, ymax-ymin]

#cell#

def get_contour_centre(vertices):
    mmnts = cv2.moments(np.asarray(vertices, dtype='int32'))
    cX = int(mmnts["m10"] / mmnts["m00"])
    cY = int(mmnts["m01"] / mmnts["m00"])
    return (cX, cY)

#cell#

class CropRotateRoi():
    def __init__(self, img, co, enlarge=1.00,
                 use_offset=None, borderValue=None,
                 rotation_matrix=None, angle=None):
        
        self.borderValue = np.asarray(borderValue,dtype=int) if borderValue is not None else None
        co = np.asarray(co, dtype=int)
        self.use_offset = use_offset
        if use_offset:
            self.offset = co.min(0)
            co -= self.offset
            print("offset", self.offset)
        # calculate the affine transformation
        self.enlarge = enlarge
        if isinstance(img, np.ndarray):
            height = img.shape[0]
            width = img.shape[1]
        elif isinstance(img, Image.Image):
            width = img.size[0]
            height = img.size[1]
        elif isinstance(img, (tuple,list)):
            width, height = img
        #self.img_size = (int(self.enlarge*height),int(self.enlarge*width), )
        if rotation_matrix is None:
            self.rotation_matrix, self.angle = CropRotateRoi.get_rotation_matrix(co, angle=angle)
        else:
            self.rotation_matrix = rotation_matrix
            self.angle = angle
            
        self.affine_matrix = self.rotation_matrix
        # calculate offset
        offset = (self.apply_roi(co, use_offset=False).min(0))#.astype(int)
 
        matrix_full = CropRotateRoi._pad_affine_matrix_(self.rotation_matrix)
        # Combine matrices
        self.transl_matrix = np.eye(3) - np.pad(offset[:,np.newaxis], ((0,1),(2,0)),
                               mode='constant', constant_values=0)
        self.affine_matrix = self.transl_matrix.dot(matrix_full)[:2,:]
        self.img_size = tuple(1+(self.apply_roi(co, use_offset=False).max(0)).astype(int)) #[::-1]

    @classmethod
    def get_rotation_matrix(self, co, angle=None):
        xdim, ydim, angle_ = cv2.minAreaRect(co)
        if angle is not None:
            angle_ = angle
        
        box = cv2.boxPoints((xdim, ydim, angle_)) 
        xmin, ymin = box.min(0)
        xmax, ymax = box.max(0)
        rect_center = ((xmin+xmax)/2,(ymin+ymax)/2)

        return cv2.getRotationMatrix2D(rect_center, angle_, 1.0), angle_
    @classmethod
    def _pad_affine_matrix_(cls, matrix):
        matrix_full = np.pad(matrix, ((0,1),(0,0)),
                       mode='constant', constant_values=0)
        matrix_full[-1,-1] = 1
        return matrix_full
    @classmethod
    def _pad_vectors_(cls, vectors):
        return np.pad(vectors, 
                      ((0,0), (0,1)),
                      mode='constant',
                      constant_values=1)
        
    def apply_img(self, img, crop=True, borderValue=None):
        if borderValue:
            self.borderValue = np.asarray(borderValue,dtype=np.uint8)#.tolist()

        borderMode = cv2.BORDER_TRANSPARENT
#         img_size = tuple(self.maxbound[::-1])
        print("img_size", self.img_size)
        "perform the affine transformation on an image"
        rotated = cv2.warpAffine(np.asarray(img), self.affine_matrix, 
                                 self.img_size, cv2.INTER_CUBIC,
                                 borderMode = borderMode,
                                )
        "fill the void"
        transparentmask = rotated[:,:,3] == 0
        transparentmask = np.broadcast_to(transparentmask[:,:,np.newaxis], rotated.shape)
        rotated = rotated+ self.borderValue.astype('uint8').reshape(1,1,-1) * transparentmask.astype('uint8')
        return rotated
    
    def apply_roi(self, co, use_offset=True):
        "perform the affine transformation on a contour"
        if use_offset and self.use_offset and hasattr(self, 'offset'):
            co -= self.offset
#         pad_contour = np.pad(co, 
#                               ((0,0), (0,1)),
#                               mode='constant',
#                               constant_values=1)
        
        pad_contour = CropRotateRoi._pad_vectors_(co)
        crop_contour = pad_contour.dot(self.affine_matrix.T)
        return crop_contour
    
    def __call__(self, *args, crop=True):
        out = []
        for aa in args:
            if isinstance(aa, Image.Image):
                out.append(self.apply_img(aa, crop=crop))
            elif len(aa.shape)==2 and aa.shape[1]==2:
                out.append(self.apply_roi(aa))
            elif len(aa.shape)>=2 and aa.shape[1]>2:
                out.append(self.apply_img(aa, crop=crop))
            else:
                out.append(None)
        if len(args)==1:
            return out[0]
        else:
            return out

#cell#

def plot_roi(roi, **kwargs):
    roi = np.asarray(roi)
    return plt.plot(roi[:,0], roi[:,1], **kwargs)

#cell#

def within_roi(vertices, roi_start, roi_size,):
    left = (vertices>=roi_start).all(0).all()
    right = (vertices<=(roi_start + roi_size)).all(0).all()
    return left and right

def transform_roi_to_rotated_chunk(transform_mag, rr, roi_start, roi_size,):
    vertices = np.asarray(rr["vertices"])
#     print(rr["name"], left, right)
    if within_roi(vertices, roi_start, roi_size,):
#         print(rr["name"])
        roi = rr.copy()
        centroid = get_contour_centre(roi["vertices"])
        roi["centroid_within_slice"] = within_roi(centroid, roi_start, roi_size,)
        roi["vertices"] = transform_mag.apply_roi(np.asarray(rr["vertices"]), use_offset=True)
        if (roi["vertices"]<0).all(0).any():
            return None
        else:
            return roi

#cell#

def get_rotated_highres_roi(slide, chunkroi_small, 
                    feature_rois = [],
                    color=True, filtersize=35, minlen=500,
                    median_color=None, angle=None
                    ):
    if median_color is None:
        median_color = get_median_color(slide)
    ratio = get_thumbnail_magnification(slide)
    
    roi_start, roi_size = roi_loc(chunkroi_small)
    region = slide.read_region((roi_start*ratio).astype(int), 0, 
                               (roi_size*ratio).astype(int))

    transform_mag = CropRotateRoi(region, chunkroi_small*ratio,
                          angle=angle,
                          use_offset=True,
                          borderValue=median_color)
    region_ = transform_mag(region,
#                             crop=True
                           )
    # transform feature rois:
    rois_within_chunk = []
    for rr in feature_rois:
        rr = transform_roi_to_rotated_chunk(transform_mag, rr, 
                                            roi_start*ratio, roi_size*ratio,)
        if rr is not None:
            print(rr["name"])
            rois_within_chunk.append(rr)
    # re-estimate the contour
    mask_ = get_threshold_tissue_mask(region_, color=color, filtersize=filtersize)
    chunkroi_large_refined = convert_mask2contour(mask_, minlen = minlen)
    assert len(chunkroi_large_refined)>0
    # take the longest contour
    maxidx = np.argmax([len(x) for x in chunkroi_large_refined])
    chunkroi_large_refined = chunkroi_large_refined[maxidx]
    return transform_roi_to_rotated_chunk, region_, chunkroi_large_refined, rois_within_chunk

#cell#

def get_roi_dict(contour, name='tissue', id=0, sq_micron_per_pixel=None):
    """input: 
        contour: numpy array
    """
    cdict = {'id':id, 
            'name': name,
            'vertices':contour.tolist(),
           'area': cv2.contourArea(np.asarray(roi["vertices"], dtype='int32'))
           }
    if sq_micron_per_pixel:
        cdict['areamicrons'] = micron_per_pixel*cdict['area']
    return cdict

# ## Read XML ROI, convert, and save as JSON

#cell#
if __name__ == '__main__':

    fnxml = "examples/6371/6371 1.xml"
    fnjson = re.sub(".xml$", ".json", fnxml)
# fnyaml = re.sub(".xml$", ".yaml", fnxml)
    with open(fnxml) as fh:
        soup = BeautifulSoup(fh, 'lxml')

#cell#

    regions = soup.find_all("region")

#cell#

    roilist = []
    for rr in regions:
#     name = rr.get("text").lower().rstrip('.')
        attrs_ = rr.attrs.copy()
        if ("text" in attrs_) and not ("name" in attrs_):
            attrs_["name"] = attrs_.pop("text").lower().rstrip('.')
        for kk,vv in attrs_.items():
            if isinstance(vv,str) and vv.isdigit():
                attrs_[kk] = int(vv)
            else:
                try:
                    attrs_[kk] = float(vv)
                except:
                    if attrs_[kk]=='':
                        attrs_[kk]=None
                    continue
        attrs_["vertices"] = get_vertices(rr)
#     dict(name=name, vertices = get_vertices(rr))
        roilist.append(attrs_)

# for an ellipse, 
# 
#    area = $\pi \times r \times R$

#cell#

# ellipses = [{"vertices":get_vertices(rr), "length": rr["length"], "area":rr["area"]} for rr in regions if rr["type"]=='2']
# ell = ellipses[1]

#cell#

    pd.Series([rr["name"] for rr in roilist]).value_counts()

#cell#

    with open(fnjson, 'w+') as fh:
        json.dump(roilist, fh)

#cell#

# with open(fnyaml, 'w+') as fh:
#     pyaml.dump(roilist, fh)

#cell#

    from matplotlib import pyplot as plt
    get_ipython().magic('matplotlib inline')

#cell#

    for rr in roilist:
        plt.plot([x for x,_ in  rr["vertices"]],
                 [y for _,y in  rr["vertices"]])

# ## Extract tissue chunk ROIs

#cell#

    fnsvs = "examples/6371/6371 1.svs"

#cell#

    slide = openslide.OpenSlide(fnsvs)
    img = np.asarray(slide.associated_images["thumbnail"])

    median_color = get_median_color(slide)

#cell#

    slide.associated_images.keys()

#cell#

    for kk,vv in slide.associated_images.items():
        print(kk, vv.size)

#cell#

    plt.imshow(img)

# ## Extract mask and contours

#cell#

    mask = get_threshold_tissue_mask(img, color=False, filtersize=7)
    contours = convert_mask2contour(mask, minlen = 100)

#cell#

    sq_micron_per_pixel = np.median([roi["areamicrons"] / roi["area"] for roi in roilist])

    tissue_roilist = [get_roi_dict(cc, name='tissue', id=nn+len(roilist), sq_micron_per_pixel=sq_micron_per_pixel) 
                          for nn,cc in enumerate(contours)]

#cell#

# Save both contour lists together

    with open(fnjson, 'w+') as fh:
        json.dump(roilist + tissue_roilist, fh)

# ### At this point we are done with basic contour extraction. It is enough for the first script 

#cell#

    plt.imshow(mask)

#cell#

#cell#

    plt.figure(figsize = (18,10))
    plt.imshow(img)
    for co in contours:
        plt.plot(co[:,0], co[:,1])

#cell#

    img.shape

#cell#

# objmask = np.zeros((height,width), np.uint8)
# objmask = cv2.fillPoly(objmask, [co], 1)

#cell#

    height, width, _ = img.shape
# objmask = convert_contour2mask(co, width, height, fill=1, shape='polygon', radius=3)

# # Rotating, slicing, and stitching the picture
# ## crop and rotate the image and a chunk roi

#cell#

# select a chunk roi
    co = contours[2]

# create a transformer object
    cr = CropRotateRoi(img, co, enlarge=1.0, borderValue=median_color)

# apply transformation
    crimg = cr(img, crop=False)
    crroi = cr(co)

#cell#

    plt.figure(figsize=(15,2))
    plt.imshow(crimg)
    plt.plot(crroi[:,0], crroi[:,1])

# ## Now we can slice the chunk horizonatally, and stack highres images from the slices
# 
# As chunks are often huge and oriented diagonally, reading the bounding box around the ROI naively would involve reading huge number of pixels most of which are blank anyways. It is often not feasible in terms of RAM.

#cell#

    np.ceil(crimg.shape[1]/int(np.ceil(crimg.shape[1]/100)))

#cell#

    stepsize = 100
    nsteps = int(np.ceil(crimg.shape[1]/stepsize))
    print(nsteps)

#cell#

    def get_img_bbox(img):
        h = img.shape[0]
        w = img.shape[1]
        return np.c_[[0,0],[w,0],[w,h], [0,h]].T

# ## Read and stitch slices

#cell#

    ratio = get_thumbnail_magnification(slide)
# for step in range(nsteps):
    regions = []
    reg_rois = []

    chunk_height_tr_large = int(np.ceil(crimg.shape[0]*max(ratio)))

    tight = False

    for step in range(2):
        currentchunk = crimg[:,stepsize*step:stepsize*(step+1)+1,:]
        if tight:
            "contour per se"
            chunk_contours = get_chunk_countours(currentchunk, 
                                       minlen = 10 , color=False,
                                       filtersize=7)
        else:
            "compatible for stitching with other slices"
            chunk_contours = [get_img_bbox(currentchunk)]
        slice_offset = np.r_[stepsize*step,0]
        chunk_contours += slice_offset
        assert len(chunk_contours) ==1
        chunk_contour = chunk_contours[0]
        
        chunk_slice_contour_origcoord = np.linalg.solve(CropRotateRoi._pad_affine_matrix_(cr.affine_matrix), 
                                                        CropRotateRoi._pad_vectors_(chunk_contour).T).T[:,:2]
        
        trnsf, region, chunk_slice_roi_origcoord_mag, rois_within_chunk = get_rotated_highres_roi(slide,
                                                                                       chunk_slice_contour_origcoord,
                                                                                       roilist,
                                                                                       angle=cr.angle
                                                                                       )
        regions.append(region)
        for roi_ in rois_within_chunk:
            roi = roi_.copy()
            roi["vertices"] += slice_offset*ratio
            reg_rois.append(roi)

#cell#

    [rr.shape for rr in regions]

#cell#

    fig, axs = plt.subplots(1,len(regions), figsize=(16, 8))
    for ax,reg in zip(axs, regions):
        ax.imshow(reg)

#cell#

    fig, axs = plt.subplots(1, figsize=(16, 8))

    plt.imshow(np.hstack(*[regions],))
#.shape
    for roi in reg_rois:
        plot_roi(roi["vertices"] )

#cell#

    plt.imshow(img)
    plot_roi( chunk_slice_contour_origcoord )

#cell#

    plt.imshow(currentchunk)
    plot_roi(chunk_contour-slice_offset)

#cell#

# plot_roi(chunk_slice_contour_origcoord)

#cell#

    plt.imshow(region)
    plot_roi(chunk_slice_roi_origcoord_mag)
    for rr in rois_within_chunk:
        plot_roi(rr["vertices"])

#cell#