import argparse
import os
from os.path import join
from pathflowai.utils import run_preprocessing_pipeline, generate_patch_pipeline, img2npy_, create_zero_mask
import click
import dask
import time
CONTEXT_SETTINGS = dict(help_option_names=['-h','--help'], max_content_width=90)
@click.group(context_settings= CONTEXT_SETTINGS)
@click.version_option(version='0.1')
def preprocessing():
pass
def output_if_exists(filename):
"""Returns file name if the file exists
Parameters
----------
filename : str
File in question.
Returns
-------
str
Filename.
"""
if os.path.exists(filename):
return filename
return None
@preprocessing.command()
@click.option('-npy', '--img2npy', is_flag=True, help='Image to numpy for faster read.', show_default=True)
@click.option('-b', '--basename', default='A01', help='Basename of patches.', type=click.Path(exists=False), show_default=True)
@click.option('-i', '--input_dir', default='./inputs/', help='Input directory for patches.', type=click.Path(exists=False), show_default=True)
@click.option('-a', '--annotations', default=[], multiple=True, help='Annotations in image in order.', type=click.Path(exists=False), show_default=True)
@click.option('-pr', '--preprocess', is_flag=True, help='Run preprocessing pipeline.', show_default=True)
@click.option('-pa', '--patches', is_flag=True, help='Add patches to SQL.', show_default=True)
@click.option('-t', '--threshold', default=0.05, help='Threshold to remove non-purple slides.', show_default=True)
@click.option('-ps', '--patch_size', default=224, help='Patch size.', show_default=True)
@click.option('-it', '--intensity_threshold', default=100., help='Intensity threshold to rate a pixel as non-white.', show_default=True)
@click.option('-g', '--generate_finetune_segmentation', is_flag=True, help='Generate patches for one segmentation mask class for targeted finetuning.', show_default=True)
@click.option('-tc', '--target_segmentation_class', default=0, help='Segmentation Class to finetune on, output patches to another db.', show_default=True)
@click.option('-tt', '--target_threshold', default=0., help='Threshold to include target for segmentation if saving one class.', show_default=True)
@click.option('-odb', '--out_db', default='./patch_info.db', help='Output patch database.', type=click.Path(exists=False), show_default=True)
@click.option('-am', '--adjust_mask', is_flag=True, help='Remove additional background regions from annotation mask.', show_default=True)
@click.option('-nn', '--n_neighbors', default=5, help='If adjusting mask, number of neighbors connectivity to remove.', show_default=True)
@click.option('-bp', '--basic_preprocess', is_flag=True, help='Basic preprocessing pipeline, annotation areas are not saved. Used for benchmarking tool against comparable pipelines', show_default=True)
@click.option('-ei', '--entire_image', is_flag=True, help='Store entire image in central db rather than patches.', show_default=True)
@click.option('-nz', '--no_zarr', is_flag=True, help='Don\'t save zarr format file.', show_default=True)
@click.option('-pka', '--pkl_annot', is_flag=True, help='Look for .annot.pkl pickle files instead of xml annotations.', show_default=True)
@click.option('-ta', '--transpose_annotations', is_flag=True, help='Transpose annotations.', show_default=True)
@click.option('-gtm', '--get_tissue_mask', is_flag=True, help='Build tissue mask instead of intensity thresholding.', show_default=True)
@click.option('-ot', '--otsu', is_flag=True, help='Utilize otsu method to decide intensity threshold.', show_default=True)
@click.option('-cm', '--compression', default=8., help='If find tissue mask, how much to downsample image.', show_default=True)
@click.option('-ch', '--return_convex_hull', is_flag=True, help='Return convex hull of tissue mask.', show_default=True)
@click.option('-kh', '--keep_holes', is_flag=True, help='Keep holes tissue mask.', show_default=True)
@click.option('-mhs', '--max_hole_size', default=0, help='If removing holes, what is maximum allowed size to remain.', show_default=True)
@click.option('-gbc', '--gray_before_close', is_flag=True, help='Filter grays before binary closing operation.', show_default=True)
@click.option('-kl', '--kernel', default=61, help='Binary closing kernel.', show_default=True)
@click.option('-mos', '--min_object_size', default=100000, help='Remove all connected components smaller than this size.', show_default=True)
@click.option('-bs', '--blur_size', default=0, help='How much to blur tissue mask.', show_default=True)
def preprocess_pipeline(img2npy,basename,input_dir,annotations,preprocess,patches,threshold,patch_size, intensity_threshold, generate_finetune_segmentation, target_segmentation_class, target_threshold, out_db, adjust_mask, n_neighbors, basic_preprocess, entire_image, no_zarr, pkl_annot, transpose_annotations,get_tissue_mask,otsu,compression,return_convex_hull, keep_holes, max_hole_size, gray_before_close, kernel, min_object_size, blur_size):
"""Preprocessing pipeline that accomplishes 3 things. 1: storage into ZARR format, 2: optional mask adjustment, 3: storage of patch-level information into SQL DB"""
for ext in ['.npy','.svs','.tiff','.tif', '.vms', '.vmu', '.ndpi', '.scn', '.mrxs', '.svslide', '.bif', '.jpeg', '.png', '.h5']:
svs_file = output_if_exists(join(input_dir,'{}{}'.format(basename,ext)))
if svs_file != None:
break
if img2npy and not svs_file.endswith('.npy'):
svs_file = img2npy_(input_dir,basename, svs_file)
xml_file = output_if_exists(join(input_dir,'{}{}'.format(basename,".xml" if not pkl_annot else ".annot.pkl")))
npy_mask = output_if_exists(join(input_dir,'{}_mask.npy'.format(basename)))
out_zarr = join(input_dir,'{}.zarr'.format(basename))
out_pkl = join(input_dir,'{}_mask.pkl'.format(basename))
adj_npy=''
start=time.time()
if preprocess:
run_preprocessing_pipeline(svs_file=svs_file,
xml_file=xml_file,
npy_mask=npy_mask,
annotations=annotations,
out_zarr=out_zarr,
out_pkl=out_pkl,
no_zarr=no_zarr,
transpose_annotations=transpose_annotations)
if npy_mask==None and xml_file==None:
print('Generating Zero Mask')
npy_mask=join(input_dir,'{}_mask.npz'.format(basename))
target_segmentation_class=1
generate_finetune_segmentation=True
create_zero_mask(npy_mask,out_zarr if not no_zarr else svs_file,out_pkl)
preprocess_point = time.time()
print('Data dump took {}'.format(preprocess_point-start))
if adjust_mask:
from pathflowai.utils import adjust_mask
adj_dir=join(input_dir,'adjusted_masks')
adj_npy=join(adj_dir,os.path.basename(npy_mask))
os.makedirs(adj_dir,exist_ok=True)
if not os.path.exists(adj_npy):
adjust_mask(npy_mask, out_zarr if not no_zarr else svs_file, adj_npy, n_neighbors)
adjust_point = time.time()
print('Adjust took {}'.format(adjust_point-preprocess_point))
if patches: # ADD EXPORT TO SQL, TABLE NAME IS PATCH SIZE
generate_patch_pipeline(basename,
input_dir=input_dir,
annotations=annotations,
threshold=threshold,
patch_size=patch_size,
out_db=out_db,
generate_finetune_segmentation=generate_finetune_segmentation,
target_class=target_segmentation_class,
intensity_threshold=intensity_threshold,
target_threshold=target_threshold,
adj_mask=adj_npy,
basic_preprocess=basic_preprocess,
entire_image=entire_image,
svs_file=svs_file,
transpose_annotations=transpose_annotations,
get_tissue_mask=get_tissue_mask,
otsu=otsu,
compression=compression,
return_convex_hull=return_convex_hull,
keep_holes=keep_holes,
max_hole_size=max_hole_size,
gray_before_close=gray_before_close,
kernel=kernel,
min_object_size=min_object_size,
blur_size=blur_size)
patch_point = time.time()
print('Patches took {}'.format(patch_point-adjust_point))
@preprocessing.command()
@click.option('-i', '--mask_dir', default='./inputs/', help='Input directory for masks.', type=click.Path(exists=False), show_default=True)
@click.option('-o', '--output_dir', default='./outputs/', help='Output directory for new masks.', type=click.Path(exists=False), show_default=True)
@click.option('-fr', '--from_annotations', default=[], multiple=True, help='Annotations to switch from.', show_default=True)
@click.option('-to', '--to_annotations', default=[], multiple=True, help='Annotations to switch to.', show_default=True)
def alter_masks(mask_dir, output_dir, from_annotations, to_annotations):
"""Map list of values to other values in mask."""
import glob
from pathflowai.utils import npy2da
import numpy as np
from dask.distributed import Client
assert len(from_annotations)==len(to_annotations)
c=Client()
from_annotations=list(map(int,from_annotations))
to_annotations=list(map(int,to_annotations))
os.makedirs(output_dir,exist_ok=True)
masks=glob.glob(join(mask_dir,'*_mask.npy'))
from_to=list(zip(from_annotations,to_annotations))
for mask in masks:
output_mask=join(output_dir,os.path.basename(mask))
arr=npy2da(mask)
for fr,to in from_to:
arr[arr==fr]=to
np.save(output_mask,arr.compute())
@preprocessing.command()
@click.option('-i', '--input_patch_db', default='patch_info_input.db', help='Input db.', type=click.Path(exists=False), show_default=True)
@click.option('-o', '--output_patch_db', default='patch_info_output.db', help='Output db.', type=click.Path(exists=False), show_default=True)
@click.option('-b', '--basename', default='A01', help='Basename.', type=click.Path(exists=False), show_default=True)
@click.option('-ps', '--patch_size', default=224, help='Patch size.', show_default=True)
def remove_basename_from_db(input_patch_db, output_patch_db, basename, patch_size):
"""Removes basename/ID from SQL DB."""
import sqlite3
import numpy as np, pandas as pd
os.makedirs(output_patch_db[:output_patch_db.rfind('/')],exist_ok=True)
conn = sqlite3.connect(input_patch_db)
df=pd.read_sql('select * from "{}";'.format(patch_size),con=conn)
conn.close()
df=df.loc[df['ID']!=basename]
conn = sqlite3.connect(output_patch_db)
df.set_index('index').to_sql(str(patch_size), con=conn, if_exists='replace')
conn.close()
@preprocessing.command()
@click.option('-i', '--input_patch_db', default='patch_info_input.db', help='Input db.', type=click.Path(exists=False), show_default=True)
@click.option('-o', '--output_patch_db', default='patch_info_output.db', help='Output db.', type=click.Path(exists=False), show_default=True)
@click.option('-fr', '--from_annotations', default=[], multiple=True, help='Annotations to switch from.', show_default=True)
@click.option('-to', '--to_annotations', default=[], multiple=True, help='Annotations to switch to.', show_default=True)
@click.option('-ps', '--patch_size', default=224, help='Patch size.', show_default=True)
@click.option('-rb', '--remove_background_annotation', default='', help='If selected, removes 100\% background patches based on this annotation.', type=click.Path(exists=False), show_default=True)
@click.option('-ma', '--max_background_area', default=0.05, help='Max background area before exclusion.', show_default=True)
def collapse_annotations(input_patch_db, output_patch_db, from_annotations, to_annotations, patch_size, remove_background_annotation, max_background_area):
"""Adds annotation classes areas to other annotation classes in SQL DB when getting rid of some annotation classes."""
import sqlite3
import numpy as np, pandas as pd
assert len(from_annotations)==len(to_annotations)
from_annotations=list(map(str,from_annotations))
to_annotations=list(map(str,to_annotations))
os.makedirs(output_patch_db[:output_patch_db.rfind('/')],exist_ok=True)
conn = sqlite3.connect(input_patch_db)
df=pd.read_sql('select * from "{}";'.format(patch_size),con=conn)
conn.close()
from_to=zip(from_annotations,to_annotations)
if remove_background_annotation:
df=df.loc[df[remove_background_annotation]<=(1.-max_background_area)]
for fr,to in from_to:
df.loc[:,to]+=df[fr]
df=df[[col for col in list(df) if col not in from_annotations]]
annotations = list(df.iloc[:,6:])
df=df.rename(columns={annot:str(i) for i, annot in enumerate(annotations)})
annotations = list(df.iloc[:,6:])
df.loc[:,'annotation']=np.vectorize(lambda i: annotations[df.iloc[i,6:].values.argmax()])(np.arange(df.shape[0]))
df.loc[:,'index']=np.arange(df.shape[0])
conn = sqlite3.connect(output_patch_db)
#print(df)
df.set_index('index').to_sql(str(patch_size), con=conn, if_exists='replace')
conn.close()
if __name__ == '__main__':
from dask.distributed import Client
dask.config.set({'temporary_dir':'tmp/',
'distributed.worker.local_dir':'tmp/',
'distributed.scheduler.allowed-failures':20})#'distributed.worker.num-workers':20}):
c=Client(processes=False)
preprocessing()
c.close()