Switch to side-by-side view

--- a
+++ b/scripts/convert_roi_sandbox.py
@@ -0,0 +1,632 @@
+
+# coding: utf-8
+
+#cell#
+
+from bs4 import BeautifulSoup
+import numpy as np
+from collections import Counter
+import pandas as pd
+import re
+import json
+# import pyaml
+
+#cell#
+
+# see https://github.com/bgilbert/anonymize-slide
+
+#cell#
+
+#cell#
+
+def get_ellipse_points(verticeslist, num=200):
+    a,b = tuple(abs(verticeslist[1] - verticeslist[0])/2)
+    xc, yc = tuple(abs(verticeslist[1] + verticeslist[0])/2)
+    tt = np.arange(0,2*np.pi, 2*np.pi/num)
+    x = xc + a*np.cos(tt)
+    y = yc + b* np.sin(tt)
+    return np.c_[x,y].tolist()
+#     return ((x),list(y))
+
+#cell#
+
+def get_vertices(region):
+    verticeslist = [cc for cc in region.vertices.children if cc!='\n']
+    verticeslist = [(vv.get('x'), vv.get('y')) for vv in verticeslist]
+    verticeslist = [(float(x), float(y)) for x,y in verticeslist]
+#     verticeslist = np.asarray(verticeslist)
+    if region["type"] == '2':
+        verticeslist = get_ellipse_points(np.asarray(verticeslist), num=200)
+    return verticeslist
+
+# ## functions for rotating, slicing, and stitching the picture
+
+#cell#
+
+import openslide
+from PIL import Image, ImageDraw
+import cv2
+import numpy as np
+
+#cell#
+
+def convert_contour2mask(roi, width, height, fill=1, shape='polygon', radius=3):
+    img = Image.new('L', (width, height), 0)
+    if len(roi)>1 and shape=='polygon':# roi.shape[0]>1:
+        roi = [tuple(x) for x in roi]
+        ImageDraw.Draw(img).polygon(roi, outline=fill, fill=fill)
+        mask = np.asarray(img, dtype='uint8')
+    else:
+        ImageDraw.Draw(img).point(roi, fill=255)
+        img = img.filter(ImageFilter.GaussianBlur(radius=radius))
+        thr = np.exp(-0.5)*img.getextrema()[1]
+        mask = fill*np.asarray(np.asarray(img,)>thr, dtype='uint8')
+    return mask
+
+#cell#
+
+def map_countour_bbox(contour, slide_dimensions,
+                     SUBSAMPLE_RATE=8):
+    
+    xmin_macro, ymin_macro = np.min(contour, axis=0)
+    xmax_macro, ymax_macro = np.max(contour, axis=0)
+    
+    xminf, xmaxf = 1.0*xmin_macro/macro.shape[1], 1.0*xmax_macro/macro.shape[1]
+    yminf, ymaxf = 1.0*ymin_macro/macro.shape[0], 1.0*ymax_macro/macro.shape[0]
+    xtotf = xmaxf - xminf
+    ytotf = ymaxf - yminf
+    print("xtotf", xtotf)
+    print("ytotf", ytotf)
+
+    # xminf*slide.dimensions[1]
+    ymin_hr = int(round(yminf*slide_dimensions[1]))
+    xmin_hr =  int(round(xminf*slide_dimensions[0]))
+    ymax_hr = int(round(ymaxf*slide_dimensions[1]))
+    xmax_hr =  int(round(xmaxf*slide_dimensions[0]))
+    
+    ytot_hr = ymax_hr - ymin_hr
+    xtot_hr = xmax_hr - xmin_hr
+    print(xtot_hr, ytot_hr)
+    
+    
+    loc_hr = (xmin_hr, ymin_hr)
+    size_hr = (xtot_hr, ytot_hr)
+    size_hr = [SUBSAMPLE_RATE*(ss//SUBSAMPLE_RATE) for ss in size_hr]
+    level_hr = 0
+    
+    img_hr = slide.read_region(loc_hr, level_hr, size_hr)
+    target_size = [s//SUBSAMPLE_RATE for s in img_hr.size]
+    print("target_size", target_size)
+
+    img_hr.thumbnail(target_size, Image.ANTIALIAS)
+    return img_hr
+
+#cell#
+
+def get_median_color(slide):
+    return np.apply_over_axes(np.median, 
+                              np.asarray(slide.associated_images["thumbnail"]),
+                              [0,1]).ravel()
+
+#cell#
+
+def get_threshold_tissue_mask(img, color=False, filtersize=7,
+                   lower = [0, 0, 180],
+                   upper = [179, 20, 255]):
+    
+    kernel = (filtersize,filtersize)
+    if color:
+        # For HSV, Hue range is [0,179], Saturation range is [0,255] and Value range is [0,255]. 
+        imghsv = cv2.cvtColor(img,  cv2.COLOR_BGR2HSV)
+        imghsv[:,:,-1] = cv2.GaussianBlur(imghsv[:,:,-1],kernel,0)
+        imghsv[:,:,1] = cv2.GaussianBlur(imghsv[:,:,1],kernel,0)
+        lower = np.asarray(lower)
+        upper = np.asarray(upper)
+        mask = cv2.inRange(imghsv, lower, upper)
+    else:
+        imgavg = np.mean(img, axis=-1).astype('uint8')
+        blur = cv2.GaussianBlur(imgavg,kernel,0)
+        ret3, mask = cv2.threshold(blur,0,1,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
+    return (~mask.astype(bool)).astype('uint8')
+
+def convert_mask2contour(mask, minlen = 50):
+    im2, contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+    
+    if minlen is not None:
+        contours = [np.squeeze(x) for x in contours if x.shape[0]>minlen]
+    return contours
+
+def get_chunk_countours(img, color=False, filtersize=7, minlen = 100):
+    mask = get_threshold_tissue_mask(img, color=color, filtersize=filtersize)
+    contours = convert_mask2contour(mask, minlen = minlen)
+    return contours
+
+#cell#
+
+def get_thumbnail_magnification(slide):
+    """get ratio of magnified / thumbnail dimension
+    assumes no isotropic scaling (indeed it is slightly anisotropic)"""
+    ratio = np.asarray(slide.dimensions) / np.asarray(slide.associated_images["thumbnail"].size)
+     # np.sqrt(np.prod(ratio))
+    return ratio
+
+def roi_loc(roi):
+    xmin, ymin = roi.min(0)
+    xmax, ymax = roi.max(0)
+    return np.r_[xmin, ymin], np.r_[xmax-xmin, ymax-ymin]
+
+#cell#
+
+def get_contour_centre(vertices):
+    mmnts = cv2.moments(np.asarray(vertices, dtype='int32'))
+    cX = int(mmnts["m10"] / mmnts["m00"])
+    cY = int(mmnts["m01"] / mmnts["m00"])
+    return (cX, cY)
+
+#cell#
+
+class CropRotateRoi():
+    def __init__(self, img, co, enlarge=1.00,
+                 use_offset=None, borderValue=None,
+                 rotation_matrix=None, angle=None):
+        
+        self.borderValue = np.asarray(borderValue,dtype=int) if borderValue is not None else None
+        co = np.asarray(co, dtype=int)
+        self.use_offset = use_offset
+        if use_offset:
+            self.offset = co.min(0)
+            co -= self.offset
+            print("offset", self.offset)
+        # calculate the affine transformation
+        self.enlarge = enlarge
+        if isinstance(img, np.ndarray):
+            height = img.shape[0]
+            width = img.shape[1]
+        elif isinstance(img, Image.Image):
+            width = img.size[0]
+            height = img.size[1]
+        elif isinstance(img, (tuple,list)):
+            width, height = img
+        #self.img_size = (int(self.enlarge*height),int(self.enlarge*width), )
+        if rotation_matrix is None:
+            self.rotation_matrix, self.angle = CropRotateRoi.get_rotation_matrix(co, angle=angle)
+        else:
+            self.rotation_matrix = rotation_matrix
+            self.angle = angle
+            
+        self.affine_matrix = self.rotation_matrix
+        # calculate offset
+        offset = (self.apply_roi(co, use_offset=False).min(0))#.astype(int)
+ 
+        matrix_full = CropRotateRoi._pad_affine_matrix_(self.rotation_matrix)
+        # Combine matrices
+        self.transl_matrix = np.eye(3) - np.pad(offset[:,np.newaxis], ((0,1),(2,0)),
+                               mode='constant', constant_values=0)
+        self.affine_matrix = self.transl_matrix.dot(matrix_full)[:2,:]
+        self.img_size = tuple(1+(self.apply_roi(co, use_offset=False).max(0)).astype(int)) #[::-1]
+
+    @classmethod
+    def get_rotation_matrix(self, co, angle=None):
+        xdim, ydim, angle_ = cv2.minAreaRect(co)
+        if angle is not None:
+            angle_ = angle
+        
+        box = cv2.boxPoints((xdim, ydim, angle_)) 
+        xmin, ymin = box.min(0)
+        xmax, ymax = box.max(0)
+        rect_center = ((xmin+xmax)/2,(ymin+ymax)/2)
+
+        return cv2.getRotationMatrix2D(rect_center, angle_, 1.0), angle_
+    @classmethod
+    def _pad_affine_matrix_(cls, matrix):
+        matrix_full = np.pad(matrix, ((0,1),(0,0)),
+                       mode='constant', constant_values=0)
+        matrix_full[-1,-1] = 1
+        return matrix_full
+    @classmethod
+    def _pad_vectors_(cls, vectors):
+        return np.pad(vectors, 
+                      ((0,0), (0,1)),
+                      mode='constant',
+                      constant_values=1)
+        
+    def apply_img(self, img, crop=True, borderValue=None):
+        if borderValue:
+            self.borderValue = np.asarray(borderValue,dtype=np.uint8)#.tolist()
+
+        borderMode = cv2.BORDER_TRANSPARENT
+#         img_size = tuple(self.maxbound[::-1])
+        print("img_size", self.img_size)
+        "perform the affine transformation on an image"
+        rotated = cv2.warpAffine(np.asarray(img), self.affine_matrix, 
+                                 self.img_size, cv2.INTER_CUBIC,
+                                 borderMode = borderMode,
+                                )
+        "fill the void"
+        transparentmask = rotated[:,:,3] == 0
+        transparentmask = np.broadcast_to(transparentmask[:,:,np.newaxis], rotated.shape)
+        rotated = rotated+ self.borderValue.astype('uint8').reshape(1,1,-1) * transparentmask.astype('uint8')
+        return rotated
+    
+    def apply_roi(self, co, use_offset=True):
+        "perform the affine transformation on a contour"
+        if use_offset and self.use_offset and hasattr(self, 'offset'):
+            co -= self.offset
+#         pad_contour = np.pad(co, 
+#                               ((0,0), (0,1)),
+#                               mode='constant',
+#                               constant_values=1)
+        
+        pad_contour = CropRotateRoi._pad_vectors_(co)
+        crop_contour = pad_contour.dot(self.affine_matrix.T)
+        return crop_contour
+    
+    def __call__(self, *args, crop=True):
+        out = []
+        for aa in args:
+            if isinstance(aa, Image.Image):
+                out.append(self.apply_img(aa, crop=crop))
+            elif len(aa.shape)==2 and aa.shape[1]==2:
+                out.append(self.apply_roi(aa))
+            elif len(aa.shape)>=2 and aa.shape[1]>2:
+                out.append(self.apply_img(aa, crop=crop))
+            else:
+                out.append(None)
+        if len(args)==1:
+            return out[0]
+        else:
+            return out
+
+#cell#
+
+def plot_roi(roi, **kwargs):
+    roi = np.asarray(roi)
+    return plt.plot(roi[:,0], roi[:,1], **kwargs)
+
+#cell#
+
+def within_roi(vertices, roi_start, roi_size,):
+    left = (vertices>=roi_start).all(0).all()
+    right = (vertices<=(roi_start + roi_size)).all(0).all()
+    return left and right
+
+def transform_roi_to_rotated_chunk(transform_mag, rr, roi_start, roi_size,):
+    vertices = np.asarray(rr["vertices"])
+#     print(rr["name"], left, right)
+    if within_roi(vertices, roi_start, roi_size,):
+#         print(rr["name"])
+        roi = rr.copy()
+        centroid = get_contour_centre(roi["vertices"])
+        roi["centroid_within_slice"] = within_roi(centroid, roi_start, roi_size,)
+        roi["vertices"] = transform_mag.apply_roi(np.asarray(rr["vertices"]), use_offset=True)
+        if (roi["vertices"]<0).all(0).any():
+            return None
+        else:
+            return roi
+
+#cell#
+
+def get_rotated_highres_roi(slide, chunkroi_small, 
+                    feature_rois = [],
+                    color=True, filtersize=35, minlen=500,
+                    median_color=None, angle=None
+                    ):
+    if median_color is None:
+        median_color = get_median_color(slide)
+    ratio = get_thumbnail_magnification(slide)
+    
+    roi_start, roi_size = roi_loc(chunkroi_small)
+    region = slide.read_region((roi_start*ratio).astype(int), 0, 
+                               (roi_size*ratio).astype(int))
+
+    transform_mag = CropRotateRoi(region, chunkroi_small*ratio,
+                          angle=angle,
+                          use_offset=True,
+                          borderValue=median_color)
+    region_ = transform_mag(region,
+#                             crop=True
+                           )
+    # transform feature rois:
+    rois_within_chunk = []
+    for rr in feature_rois:
+        rr = transform_roi_to_rotated_chunk(transform_mag, rr, 
+                                            roi_start*ratio, roi_size*ratio,)
+        if rr is not None:
+            print(rr["name"])
+            rois_within_chunk.append(rr)
+    # re-estimate the contour
+    mask_ = get_threshold_tissue_mask(region_, color=color, filtersize=filtersize)
+    chunkroi_large_refined = convert_mask2contour(mask_, minlen = minlen)
+    assert len(chunkroi_large_refined)>0
+    # take the longest contour
+    maxidx = np.argmax([len(x) for x in chunkroi_large_refined])
+    chunkroi_large_refined = chunkroi_large_refined[maxidx]
+    return transform_roi_to_rotated_chunk, region_, chunkroi_large_refined, rois_within_chunk
+
+#cell#
+
+def get_roi_dict(contour, name='tissue', id=0, sq_micron_per_pixel=None):
+    """input: 
+        contour: numpy array
+    """
+    cdict = {'id':id, 
+            'name': name,
+            'vertices':contour.tolist(),
+           'area': cv2.contourArea(np.asarray(roi["vertices"], dtype='int32'))
+           }
+    if sq_micron_per_pixel:
+        cdict['areamicrons'] = micron_per_pixel*cdict['area']
+    return cdict
+
+# ## Read XML ROI, convert, and save as JSON
+
+#cell#
+if __name__ == '__main__':
+
+    fnxml = "examples/6371/6371 1.xml"
+    fnjson = re.sub(".xml$", ".json", fnxml)
+# fnyaml = re.sub(".xml$", ".yaml", fnxml)
+    with open(fnxml) as fh:
+        soup = BeautifulSoup(fh, 'lxml')
+
+#cell#
+
+    regions = soup.find_all("region")
+
+#cell#
+
+    roilist = []
+    for rr in regions:
+#     name = rr.get("text").lower().rstrip('.')
+        attrs_ = rr.attrs.copy()
+        if ("text" in attrs_) and not ("name" in attrs_):
+            attrs_["name"] = attrs_.pop("text").lower().rstrip('.')
+        for kk,vv in attrs_.items():
+            if isinstance(vv,str) and vv.isdigit():
+                attrs_[kk] = int(vv)
+            else:
+                try:
+                    attrs_[kk] = float(vv)
+                except:
+                    if attrs_[kk]=='':
+                        attrs_[kk]=None
+                    continue
+        attrs_["vertices"] = get_vertices(rr)
+#     dict(name=name, vertices = get_vertices(rr))
+        roilist.append(attrs_)
+
+# for an ellipse, 
+# 
+#    area = $\pi \times r \times R$
+
+#cell#
+
+# ellipses = [{"vertices":get_vertices(rr), "length": rr["length"], "area":rr["area"]} for rr in regions if rr["type"]=='2']
+# ell = ellipses[1]
+
+#cell#
+
+    pd.Series([rr["name"] for rr in roilist]).value_counts()
+
+#cell#
+
+    with open(fnjson, 'w+') as fh:
+        json.dump(roilist, fh)
+
+#cell#
+
+# with open(fnyaml, 'w+') as fh:
+#     pyaml.dump(roilist, fh)
+
+#cell#
+
+    from matplotlib import pyplot as plt
+    get_ipython().magic('matplotlib inline')
+
+#cell#
+
+    for rr in roilist:
+        plt.plot([x for x,_ in  rr["vertices"]],
+                 [y for _,y in  rr["vertices"]])
+
+# ## Extract tissue chunk ROIs
+
+#cell#
+
+    fnsvs = "examples/6371/6371 1.svs"
+
+#cell#
+
+    slide = openslide.OpenSlide(fnsvs)
+    img = np.asarray(slide.associated_images["thumbnail"])
+
+    median_color = get_median_color(slide)
+
+#cell#
+
+    slide.associated_images.keys()
+
+#cell#
+
+    for kk,vv in slide.associated_images.items():
+        print(kk, vv.size)
+
+#cell#
+
+    plt.imshow(img)
+
+# ## Extract mask and contours
+
+#cell#
+
+    mask = get_threshold_tissue_mask(img, color=False, filtersize=7)
+    contours = convert_mask2contour(mask, minlen = 100)
+
+#cell#
+
+    sq_micron_per_pixel = np.median([roi["areamicrons"] / roi["area"] for roi in roilist])
+
+    tissue_roilist = [get_roi_dict(cc, name='tissue', id=nn+len(roilist), sq_micron_per_pixel=sq_micron_per_pixel) 
+                          for nn,cc in enumerate(contours)]
+
+#cell#
+
+# Save both contour lists together
+
+    with open(fnjson, 'w+') as fh:
+        json.dump(roilist + tissue_roilist, fh)
+
+# ### At this point we are done with basic contour extraction. It is enough for the first script 
+
+#cell#
+
+    plt.imshow(mask)
+
+#cell#
+
+#cell#
+
+    plt.figure(figsize = (18,10))
+    plt.imshow(img)
+    for co in contours:
+        plt.plot(co[:,0], co[:,1])
+
+#cell#
+
+    img.shape
+
+#cell#
+
+# objmask = np.zeros((height,width), np.uint8)
+# objmask = cv2.fillPoly(objmask, [co], 1)
+
+#cell#
+
+    height, width, _ = img.shape
+# objmask = convert_contour2mask(co, width, height, fill=1, shape='polygon', radius=3)
+
+# # Rotating, slicing, and stitching the picture
+# ## crop and rotate the image and a chunk roi
+
+#cell#
+
+# select a chunk roi
+    co = contours[2]
+
+# create a transformer object
+    cr = CropRotateRoi(img, co, enlarge=1.0, borderValue=median_color)
+
+# apply transformation
+    crimg = cr(img, crop=False)
+    crroi = cr(co)
+
+#cell#
+
+    plt.figure(figsize=(15,2))
+    plt.imshow(crimg)
+    plt.plot(crroi[:,0], crroi[:,1])
+
+# ## Now we can slice the chunk horizonatally, and stack highres images from the slices
+# 
+# As chunks are often huge and oriented diagonally, reading the bounding box around the ROI naively would involve reading huge number of pixels most of which are blank anyways. It is often not feasible in terms of RAM.
+
+#cell#
+
+    np.ceil(crimg.shape[1]/int(np.ceil(crimg.shape[1]/100)))
+
+#cell#
+
+    stepsize = 100
+    nsteps = int(np.ceil(crimg.shape[1]/stepsize))
+    print(nsteps)
+
+#cell#
+
+    def get_img_bbox(img):
+        h = img.shape[0]
+        w = img.shape[1]
+        return np.c_[[0,0],[w,0],[w,h], [0,h]].T
+
+# ## Read and stitch slices
+
+#cell#
+
+    ratio = get_thumbnail_magnification(slide)
+# for step in range(nsteps):
+    regions = []
+    reg_rois = []
+
+    chunk_height_tr_large = int(np.ceil(crimg.shape[0]*max(ratio)))
+
+    tight = False
+
+    for step in range(2):
+        currentchunk = crimg[:,stepsize*step:stepsize*(step+1)+1,:]
+        if tight:
+            "contour per se"
+            chunk_contours = get_chunk_countours(currentchunk, 
+                                       minlen = 10 , color=False,
+                                       filtersize=7)
+        else:
+            "compatible for stitching with other slices"
+            chunk_contours = [get_img_bbox(currentchunk)]
+        slice_offset = np.r_[stepsize*step,0]
+        chunk_contours += slice_offset
+        assert len(chunk_contours) ==1
+        chunk_contour = chunk_contours[0]
+        
+        chunk_slice_contour_origcoord = np.linalg.solve(CropRotateRoi._pad_affine_matrix_(cr.affine_matrix), 
+                                                        CropRotateRoi._pad_vectors_(chunk_contour).T).T[:,:2]
+        
+        trnsf, region, chunk_slice_roi_origcoord_mag, rois_within_chunk = get_rotated_highres_roi(slide,
+                                                                                       chunk_slice_contour_origcoord,
+                                                                                       roilist,
+                                                                                       angle=cr.angle
+                                                                                       )
+        regions.append(region)
+        for roi_ in rois_within_chunk:
+            roi = roi_.copy()
+            roi["vertices"] += slice_offset*ratio
+            reg_rois.append(roi)
+
+#cell#
+
+    [rr.shape for rr in regions]
+
+#cell#
+
+    fig, axs = plt.subplots(1,len(regions), figsize=(16, 8))
+    for ax,reg in zip(axs, regions):
+        ax.imshow(reg)
+
+#cell#
+
+    fig, axs = plt.subplots(1, figsize=(16, 8))
+
+    plt.imshow(np.hstack(*[regions],))
+#.shape
+    for roi in reg_rois:
+        plot_roi(roi["vertices"] )
+
+#cell#
+
+    plt.imshow(img)
+    plot_roi( chunk_slice_contour_origcoord )
+
+#cell#
+
+    plt.imshow(currentchunk)
+    plot_roi(chunk_contour-slice_offset)
+
+#cell#
+
+# plot_roi(chunk_slice_contour_origcoord)
+
+#cell#
+
+    plt.imshow(region)
+    plot_roi(chunk_slice_roi_origcoord_mag)
+    for rr in rois_within_chunk:
+        plot_roi(rr["vertices"])
+
+#cell#