slideslicer / Git / Diff of /slideslicer/sample_from

Models:

DanielG/

slideslicer

Downloads: 1

Diff of /slideslicer/sample_from_slide.py [000000] .. [5a7589]

Switch to unified view

 b/slideslicer/sample_from_slide.py
+# coding: utf-8
+from PIL import Image
+import numpy as np
+from collections import Counter
+import pandas as pd
+import os
+import re
+import json
+from warnings import warn
+import openslide
+import cv2
+from pycocotools.mask import encode, decode
+from slideslicer.extract_rois_svs_xml import extract_rois_svs_xml
+from slideslicer.slideutils import (plot_contour, get_median_color,
+                        get_thumbnail_magnification,
+                        get_img_bbox, get_rotated_highres_roi,
+                        get_uniform_tiles,
+                        get_threshold_tissue_mask,
+                        convert_contour2mask,
+                        convert_mask2contour,
+                        CropRotateRoi,
+                        get_contour_centre, read_roi_patches_from_slide,
+                        clip_roi_wi_bbox, sample_points)
+def get_img_id(svsname):
+    imgid = re.sub("\.svs$","",
+                   os.path.basename(svsname)
+                   ).replace(" ", "_").replace("-","_")
+    return imgid
+def get_prefix(imgid, pos, name, tissueid, id, parentdir = "data", suffix=''):
+    prefix = "{parentdir}/{typ}/{imgid}-{pos}-t{tissue}-r{roiid}-{typ}{suffix}".format(**{
+                                        "tissue":tissueid,
+                                        "pos": "x{}-y{}".format(*pos),
+                                        "parentdir":parentdir,
+                                        "imgid":imgid,
+                                        "roiid":id,
+                                        "typ": (name.replace(" ","_")),
+                                        "suffix":suffix,
+                                        })
+    return prefix
+def summarize_rois_wi_patch(rois, bg_names = ["tissue"], frac_thr=16):
+    names = []
+    areas = []
+    ids = []
+    tissue_info = []
+    for rr in rois:
+        if rr['name'] in bg_names:
+            tissue_info.append(rr)
+            continue
+        names.append(rr['name'])
+        areas.append(rr['area'])
+        ids.append(rr['id'])
+#     assert (len(tissue_info)==1)
+    tissue_id = "+".join(sorted(["%s"%tt['id'] for tt in tissue_info]))
+    dfareas = (pd.DataFrame(dict(area=areas, name=names, id=ids))
+                     .sort_values("area", ascending=False)
+               )
+    areasum = (dfareas.groupby('name')
+                     .agg({"area":sum, "id": "first"})
+                     .sort_values("area", ascending=False)
+              )
+    if len(areasum) == 0:
+        return {'name':'blank',
+            "id": tissue_id,
+            "tissue_id": tissue_id,
+            "stats": dfareas.to_dict(orient='records')
+            }
+    elif len(areasum)==1:
+        name = areasum.index[0]
+        id = areasum["id"][0]
+    elif areasum["area"][0]/areasum["area"][1] > frac_thr:
+        name = areasum.index[0]
+        id = areasum["id"][0]
+    else:
+        name = '+'.join(areasum.index.tolist())
+        id = '+'.join(sorted(areasum["id"].astype(str).tolist()))
+    return {"name":name,
+            "id": str(id),
+            "tissue_id": tissue_id,
+            "stats": dfareas.to_dict(orient='records')}
+# Rewrite for generator if needed:
+def visualise_chunks_and_rois(img_arr, roi_cropped_list,
+                              nrows = 5, figsize=(15,15)
+                             ):
+    fig, axs = plt.subplots(nrows,len(img_arr)//nrows, figsize=figsize)
+    for ax, reg, rois in zip(axs.ravel(), img_arr, roi_cropped_list):
+        ax.imshow(reg)
+        for rr in rois:
+            if rr['name'] == 'tissue':
+                continue
+            plot_contour(rr["vertices"], ax=ax)
+        xlab = "\n".join(["{}: {}".format(rr['id'], rr['name']) \
+                          for rr in rois if rr['name'] !='tissue'])
+        ax.set_xlabel(xlab)
+        ax.set_xticklabels([])
+        ax.set_yticklabels([])
+def get_tissue_rois(slide,
+                    roilist,
+                    vis = False,
+                    step = 1024,
+                    magnlevel = 0,
+                    target_size = None,
+                    maxarea = 1e7,
+                    random=False,
+                    normal_only=True,
+                    shift_factor = 2,
+                   ):
+    print("NORMAL_ONLY", normal_only)
+    if target_size is None:
+        target_size = [step]*2
+    tissue_rois = [roi for roi in roilist if roi['name']=='tissue']
+    for roi in tissue_rois:
+        print("tissue roi, id", roi["id"])
+        cont = roi["vertices"]
+        points = sample_points(cont,
+                              spacing = step,
+                              shift = -step//shift_factor,
+                              mode = 'random' if random else 'grid')
+        print("roi {} #{}:\t{:d} points sampled".format(roi["name"], roi["id"],len(points), ))
+        pointroilist = [{"vertices":[pp], "area":0} for pp in points]
+#         img_arr, roi_cropped_list, msk_arr, = \
+        imgroiiter = read_roi_patches_from_slide(slide,
+                                        pointroilist,
+                                        but_list = roilist,
+                                        target_size = target_size,
+                                        magnlevel = magnlevel,
+                                        maxarea = maxarea,
+                                        color=1,
+                                        nchannels=3,
+                                        allcomponents = True,
+                                        nomask=True,
+                                       )
+#         if vis:
+#             plt.scatter(points[:,0], points[:,1],c='r')
+#             plot_contour(cont)
+        # filter for rois with only normal tissue
+        def filter_(x):
+            return all(roi['name']=='tissue' for roi in x[1])
+        if normal_only:
+            imgroiiter = filter(filter_, imgroiiter)
+        yield imgroiiter
+def save_tissue_chunks(imgroiiter, imgid, parentdir="data",
+                       lower = [0, 0, 180],
+                       upper = [179, 10, 255],
+                       close=50,
+                       open_=30,
+                       filtersize = 20,
+                       frac_thr=16,
+                       ):
+    for ii, (reg, rois, _, start_xy) in enumerate(imgroiiter):
+        sumdict = summarize_rois_wi_patch(rois, bg_names = [], frac_thr=frac_thr)
+        prefix = get_prefix(imgid, start_xy, sumdict["name"], sumdict["id"], ii,
+                            parentdir=parentdir,)
+        #fn_summary_json = prefix + "-summary.json"
+        fn_json = prefix + ".json"
+        fnoutpng = prefix + '.png'
+        print(fnoutpng)
+        os.makedirs(os.path.dirname(fn_json), exist_ok=True)
+        #with open(fn_summary_json, 'w+') as fhj: json.dump(sumdict, fhj)
+        if isinstance(reg, Image.Image):
+            reg.save(fnoutpng)
+        else:
+            Image.fromarray(reg).save(fnoutpng)
+        rois = add_roi_bytes(rois, np.asarray(reg),
+                lower=lower, upper=upper,
+                open=open_, close=close,
+                filtersize=filtersize)
+        with open(fn_json, 'w+') as fhj: json.dump(rois, fhj)
+def add_roi_bytes(rois, reg,
+                  lower = [0, 0, 180],
+                  upper = [179, 25, 255],
+                  filtersize=25,
+                  close=True,
+                  open=False,
+                  minlen = -1):
+    if minlen==-1:
+        minlen=filtersize
+    rois = rois.copy()
+    tissue_roi = None
+    other_mask_ = 0
+    print('ROIS:', *[roi_['name'] for roi_ in rois])
+    for roi_ in rois:
+        if roi_["name"] == "tissue":
+            tissue_roi = roi_
+            continue
+        mask_ = convert_contour2mask(roi_["vertices"],
+                                     reg.shape[1], reg.shape[0],
+                                     fill=1, order='F')
+        cocomask = encode(np.asarray(mask_, dtype='uint8'))
+        cocomask["counts"] = cocomask["counts"].decode('utf-8')
+        roi_.update(cocomask)
+        if isinstance(roi_["vertices"], np.ndarray):
+            roi_["vertices"] = roi_["vertices"].tolist()
+        other_mask_ = np.maximum(other_mask_, mask_)
+    roi_ = tissue_roi
+    if roi_ is None:
+        warn("Someting strange is going on. Make sure no tissue chunks are missing")
+        roi_ = {'vertices': []}
+    #print('tissue roi', roi_)
+    if reg is not None:
+        mask_ = get_threshold_tissue_mask(reg, color=True,
+                                filtersize=filtersize,
+                                dtype=bool,
+                                open=open, close=close,
+                                lower = lower, upper = upper)
+        if mask_.sum()==0:
+            roi_["vertices"]= []
+            print("skipping empty mask", roi_['name'], roi_['id'])
+        verts = convert_mask2contour(mask_.astype('uint8'), minlen=minlen)
+        # print("verts", len(verts))
+        if len(verts)>0:
+            #print('vertices', verts[np.argmax(map(len,verts))])
+            roi_["vertices"] = verts[np.argmax(map(len,verts))]
+        else:
+            #print("verts", len(verts), roi_["vertices"])
+            pass
+        mask_ = np.asarray(mask_, order='F')
+    else:
+        mask_ = convert_contour2mask(roi_["vertices"], reg.shape[1], reg.shape[0],
+                             fill=1, order='F')
+        if mask_.sum()==0:
+            roi_["vertices"]= []
+            #continue
+    if isinstance(other_mask_, np.ndarray):
+        mask_ = mask_.astype(bool) & ~other_mask_.astype(bool)
+    cocomask = encode(np.asarray(mask_, dtype='uint8'))
+    cocomask["counts"] = cocomask["counts"].decode('utf-8')
+    roi_.update(cocomask)
+    if isinstance(roi_["vertices"], np.ndarray):
+        roi_["vertices"] = roi_["vertices"].tolist()
+    rois = [rr for rr in rois if len(rr['vertices'])>0]
+    return rois
+if __name__ == '__main__':
+    import sys
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+      '--data-root',
+      type=str,
+      default='../data',
+      help='The directory where the input data will be stored.')
+    parser.add_argument(
+      '--json-dir',
+      type=str,
+      default='../data/roi-json',
+      help='The directory where the roi JSON files will be stored.')
+    parser.add_argument(
+      '--keep-empty',
+      action='store_true',
+      default=False,
+      help='keep empty tissue chunks (with no annotations within)')
+    parser.add_argument(
+      '--target-side',
+      type=int,
+      default=1024,
+      help='The directory where the input data will be stored.')
+    parser.add_argument(
+      '--max-area',
+      type=float,
+      default=1e7,
+      help='maximal area of a roi')
+    parser.add_argument(
+      '--fnxml',
+      dest='fnxml',
+      type=str,
+      help='The XML files for ROI.')
+    parser.add_argument(
+      '--all-grid',
+      action='store_true',
+      default=False,
+      help='store all grid patches (by defaut grid patches that overlap features will be removed)')
+    parser.add_argument(
+      '--target-sampling',
+      action='store_true',
+      default=False,
+      help='store only grid patches')
+    parser.add_argument(
+      '--keep-levels',
+      type=int,
+      default=3,
+      help='.')
+    parser.add_argument(
+      '--magnlevel',
+      type=int,
+      default=0,
+      help='.')
+    parser.add_argument(
+      '--frac-stride',
+      type=int,
+      default=1,
+      help='.')
+    prms = parser.parse_args()
+    VISUALIZE = False
+    lower = [0, 0, 180]
+    upper = [179, 10, 255]
+    close=50
+    open_=30
+    filtersize = 20
+    fnsvs = re.sub(".xml$", ".svs", prms.fnxml)
+    outdir = os.path.join(prms.data_root, "data_{}/fullsplit".format(prms.target_side))
+    ## setup
+    imgid = get_img_id(fnsvs)
+    target_size = [prms.target_side, prms.target_side,]
+    #os.makedirs(outdir)
+    # ## Read XML ROI, convert, and save as JSON
+    fnjson = extract_rois_svs_xml(prms.fnxml, outdir=prms.json_dir,
+                                  remove_empty = ~prms.keep_empty,
+                                  keeplevels=prms.keep_levels)
+    with open(fnjson,'r') as fh:
+        roilist = json.load(fh)
+    print("ROI type counts")
+    print(pd.Series([roi["name"] for roi in roilist]).value_counts())
+    # read slide
+    slide = openslide.OpenSlide(fnsvs)
+    # load the thumbnail image
+    img = np.asarray(slide.associated_images["thumbnail"])
+    median_color = get_median_color(slide)
+    ratio = get_thumbnail_magnification(slide)
+    print("full scale slide dimensions: w={}, h={}".format(*slide.dimensions))
+    if VISUALIZE:
+        from matplotlib import pyplot as plt
+        colordict = {'open glom': 'b',
+                     'scler glom': 'm',
+                     'infl':'r',
+                     'tissue':'w',
+                     'other tissue':'y',
+                     'art':'olive',
+                     'fold':'y'}
+        #cell#
+        plt.figure(figsize = (18,10))
+        plt.imshow(img)
+        for roi in roilist:
+            plot_contour(roi["vertices"]/ratio, c=colordict[roi['name']])
+        #cell#
+        vert = roilist[19]["vertices"]
+        target_size = [1024]*2
+        x,y,w,h = cv2.boundingRect(np.asarray(vert).round().astype(int))
+        mask, cropped_vertices = get_region_mask(vert, [x,y], (w,h), color=(255,))
+        plt.imshow(mask)
+        plot_contour(cropped_vertices, c='r')
+        print(mask.max())
+    #############################
+    if prms.target_sampling:
+        print("READING TARGETED ROIS", file=sys.stderr)
+        imgroiiter = read_roi_patches_from_slide(slide, roilist,
+                                target_size = target_size,
+                                maxarea = prms.max_area,
+                                nchannels=3,
+                                allcomponents=True,
+                               )
+        print("READING AND SAVING SMALLER ROIS (GLOMERULI, INFLAMMATION LOCI ETC.)",
+              file=sys.stderr)
+        for reg, rois,_, start_xy in imgroiiter:
+            sumdict = summarize_rois_wi_patch(rois, bg_names = ["tissue"], frac_thr=16)
+            prefix = get_prefix(imgid, start_xy, sumdict["name"], sumdict["tissue_id"],
+                                sumdict["id"], parentdir=outdir, suffix='-targeted')
+            #fn_summary_json = prefix + "-summary.json"
+            fn_json = prefix + ".json"
+            fnoutpng = prefix + '.png'
+            print(fnoutpng)
+            os.makedirs(os.path.dirname(fn_json), exist_ok=True)
+            #with open(fn_summary_json, 'w+') as fhj: json.dump(sumdict, fhj)
+            if isinstance(reg, Image.Image):
+                reg.save(fnoutpng)
+            else:
+                Image.fromarray(reg).save(fnoutpng)
+            rois = add_roi_bytes(rois, reg, lower=lower, upper=upper,
+                                 close=close,
+                                 open=open_,
+                                 filtersize = filtersize)
+            with open(fn_json, 'w+') as fhj: json.dump( rois, fhj)
+    print("READING AND SAVING _FEATURELESS_ / NORMAL TISSUE", file=sys.stderr)
+    magnification = 4**prms.magnlevel
+    real_side = prms.target_side * magnification
+    for tissue_chunk_iter in get_tissue_rois(slide,
+                                            roilist,
+                                            vis = False,
+                                            step = real_side // prms.frac_stride,
+                                            target_size = [real_side]*2,
+                                            maxarea = 1e7,
+                                            random=False,
+                                            normal_only = not prms.all_grid,
+                                           ):
+            # save
+            print('saving tissue chunk')
+            save_tissue_chunks(tissue_chunk_iter, imgid, parentdir=outdir,
+                               close=close,
+                               open_=open_,
+                               frac_thr=16,
+                               filtersize = filtersize)