b/pathflowai/cli_preprocessing.py
+import argparse
+import os
+from os.path import join
+from pathflowai.utils import run_preprocessing_pipeline, generate_patch_pipeline, img2npy_, create_zero_mask
+import click
+import dask
+import time
+CONTEXT_SETTINGS = dict(help_option_names=['-h','--help'], max_content_width=90)
+@click.group(context_settings= CONTEXT_SETTINGS)
+@click.version_option(version='0.1')
+def preprocessing():
+    pass
+def output_if_exists(filename):
+    """Returns file name if the file exists
+    Parameters
+    ----------
+    filename : str
+        File in question.
+    Returns
+    -------
+    str
+        Filename.
+    """
+    if os.path.exists(filename):
+        return filename
+    return None
+@preprocessing.command()
+@click.option('-npy', '--img2npy', is_flag=True, help='Image to numpy for faster read.', show_default=True)
+@click.option('-b', '--basename', default='A01', help='Basename of patches.', type=click.Path(exists=False), show_default=True)
+@click.option('-i', '--input_dir', default='./inputs/', help='Input directory for patches.', type=click.Path(exists=False), show_default=True)
+@click.option('-a', '--annotations', default=[], multiple=True, help='Annotations in image in order.', type=click.Path(exists=False), show_default=True)
+@click.option('-pr', '--preprocess', is_flag=True, help='Run preprocessing pipeline.', show_default=True)
+@click.option('-pa', '--patches', is_flag=True, help='Add patches to SQL.', show_default=True)
+@click.option('-t', '--threshold', default=0.05, help='Threshold to remove non-purple slides.',  show_default=True)
+@click.option('-ps', '--patch_size', default=224, help='Patch size.',  show_default=True)
+@click.option('-it', '--intensity_threshold', default=100., help='Intensity threshold to rate a pixel as non-white.',  show_default=True)
+@click.option('-g', '--generate_finetune_segmentation', is_flag=True, help='Generate patches for one segmentation mask class for targeted finetuning.', show_default=True)
+@click.option('-tc', '--target_segmentation_class', default=0, help='Segmentation Class to finetune on, output patches to another db.',  show_default=True)
+@click.option('-tt', '--target_threshold', default=0., help='Threshold to include target for segmentation if saving one class.',  show_default=True)
+@click.option('-odb', '--out_db', default='./patch_info.db', help='Output patch database.', type=click.Path(exists=False), show_default=True)
+@click.option('-am', '--adjust_mask', is_flag=True, help='Remove additional background regions from annotation mask.', show_default=True)
+@click.option('-nn', '--n_neighbors', default=5, help='If adjusting mask, number of neighbors connectivity to remove.',  show_default=True)
+@click.option('-bp', '--basic_preprocess', is_flag=True, help='Basic preprocessing pipeline, annotation areas are not saved. Used for benchmarking tool against comparable pipelines', show_default=True)
+@click.option('-ei', '--entire_image', is_flag=True, help='Store entire image in central db rather than patches.', show_default=True)
+@click.option('-nz', '--no_zarr', is_flag=True, help='Don\'t save zarr format file.', show_default=True)
+@click.option('-pka', '--pkl_annot', is_flag=True, help='Look for .annot.pkl pickle files instead of xml annotations.', show_default=True)
+@click.option('-ta', '--transpose_annotations', is_flag=True, help='Transpose annotations.', show_default=True)
+@click.option('-gtm', '--get_tissue_mask', is_flag=True, help='Build tissue mask instead of intensity thresholding.', show_default=True)
+@click.option('-ot', '--otsu', is_flag=True, help='Utilize otsu method to decide intensity threshold.', show_default=True)
+@click.option('-cm', '--compression', default=8., help='If find tissue mask, how much to downsample image.',  show_default=True)
+@click.option('-ch', '--return_convex_hull', is_flag=True, help='Return convex hull of tissue mask.', show_default=True)
+@click.option('-kh', '--keep_holes', is_flag=True, help='Keep holes tissue mask.', show_default=True)
+@click.option('-mhs', '--max_hole_size', default=0, help='If removing holes, what is maximum allowed size to remain.',  show_default=True)
+@click.option('-gbc', '--gray_before_close', is_flag=True, help='Filter grays before binary closing operation.', show_default=True)
+@click.option('-kl', '--kernel', default=61, help='Binary closing kernel.',  show_default=True)
+@click.option('-mos', '--min_object_size', default=100000, help='Remove all connected components smaller than this size.',  show_default=True)
+@click.option('-bs', '--blur_size', default=0, help='How much to blur tissue mask.',  show_default=True)
+def preprocess_pipeline(img2npy,basename,input_dir,annotations,preprocess,patches,threshold,patch_size, intensity_threshold, generate_finetune_segmentation, target_segmentation_class, target_threshold, out_db, adjust_mask, n_neighbors, basic_preprocess, entire_image, no_zarr, pkl_annot, transpose_annotations,get_tissue_mask,otsu,compression,return_convex_hull, keep_holes, max_hole_size, gray_before_close, kernel, min_object_size, blur_size):
+    """Preprocessing pipeline that accomplishes 3 things. 1: storage into ZARR format, 2: optional mask adjustment, 3: storage of patch-level information into SQL DB"""
+    for ext in ['.npy','.svs','.tiff','.tif', '.vms', '.vmu', '.ndpi', '.scn', '.mrxs', '.svslide', '.bif', '.jpeg', '.png', '.h5']:
+        svs_file = output_if_exists(join(input_dir,'{}{}'.format(basename,ext)))
+        if svs_file != None:
+            break
+    if img2npy and not svs_file.endswith('.npy'):
+        svs_file = img2npy_(input_dir,basename, svs_file)
+    xml_file = output_if_exists(join(input_dir,'{}{}'.format(basename,".xml" if not pkl_annot else ".annot.pkl")))
+    npy_mask = output_if_exists(join(input_dir,'{}_mask.npy'.format(basename)))
+    out_zarr = join(input_dir,'{}.zarr'.format(basename))
+    out_pkl = join(input_dir,'{}_mask.pkl'.format(basename))
+    adj_npy=''
+    start=time.time()
+    if preprocess:
+        run_preprocessing_pipeline(svs_file=svs_file,
+                               xml_file=xml_file,
+                               npy_mask=npy_mask,
+                               annotations=annotations,
+                               out_zarr=out_zarr,
+                               out_pkl=out_pkl,
+                               no_zarr=no_zarr,
+                               transpose_annotations=transpose_annotations)
+    if npy_mask==None and xml_file==None:
+        print('Generating Zero Mask')
+        npy_mask=join(input_dir,'{}_mask.npz'.format(basename))
+        target_segmentation_class=1
+        generate_finetune_segmentation=True
+        create_zero_mask(npy_mask,out_zarr if not no_zarr else svs_file,out_pkl)
+    preprocess_point = time.time()
+    print('Data dump took {}'.format(preprocess_point-start))
+    if adjust_mask:
+        from pathflowai.utils import adjust_mask
+        adj_dir=join(input_dir,'adjusted_masks')
+        adj_npy=join(adj_dir,os.path.basename(npy_mask))
+        os.makedirs(adj_dir,exist_ok=True)
+        if not os.path.exists(adj_npy):
+            adjust_mask(npy_mask, out_zarr if not no_zarr else svs_file, adj_npy, n_neighbors)
+    adjust_point = time.time()
+    print('Adjust took {}'.format(adjust_point-preprocess_point))
+    if patches: # ADD EXPORT TO SQL, TABLE NAME IS PATCH SIZE
+        generate_patch_pipeline(basename,
+                            input_dir=input_dir,
+                            annotations=annotations,
+                            threshold=threshold,
+                            patch_size=patch_size,
+                            out_db=out_db,
+                            generate_finetune_segmentation=generate_finetune_segmentation,
+                            target_class=target_segmentation_class,
+                            intensity_threshold=intensity_threshold,
+                            target_threshold=target_threshold,
+                            adj_mask=adj_npy,
+                            basic_preprocess=basic_preprocess,
+                            entire_image=entire_image,
+                            svs_file=svs_file,
+                            transpose_annotations=transpose_annotations,
+                            get_tissue_mask=get_tissue_mask,
+                            otsu=otsu,
+                            compression=compression,
+                            return_convex_hull=return_convex_hull,
+                            keep_holes=keep_holes,
+                            max_hole_size=max_hole_size,
+                            gray_before_close=gray_before_close,
+                            kernel=kernel,
+                            min_object_size=min_object_size,
+                            blur_size=blur_size)
+    patch_point = time.time()
+    print('Patches took {}'.format(patch_point-adjust_point))
+@preprocessing.command()
+@click.option('-i', '--mask_dir', default='./inputs/', help='Input directory for masks.', type=click.Path(exists=False), show_default=True)
+@click.option('-o', '--output_dir', default='./outputs/', help='Output directory for new masks.', type=click.Path(exists=False), show_default=True)
+@click.option('-fr', '--from_annotations', default=[], multiple=True, help='Annotations to switch from.', show_default=True)
+@click.option('-to', '--to_annotations', default=[], multiple=True, help='Annotations to switch to.', show_default=True)
+def alter_masks(mask_dir, output_dir, from_annotations, to_annotations):
+    """Map list of values to other values in mask."""
+    import glob
+    from pathflowai.utils import npy2da
+    import numpy as np
+    from dask.distributed import Client
+    assert len(from_annotations)==len(to_annotations)
+    c=Client()
+    from_annotations=list(map(int,from_annotations))
+    to_annotations=list(map(int,to_annotations))
+    os.makedirs(output_dir,exist_ok=True)
+    masks=glob.glob(join(mask_dir,'*_mask.npy'))
+    from_to=list(zip(from_annotations,to_annotations))
+    for mask in masks:
+        output_mask=join(output_dir,os.path.basename(mask))
+        arr=npy2da(mask)
+        for fr,to in from_to:
+            arr[arr==fr]=to
+        np.save(output_mask,arr.compute())
+@preprocessing.command()
+@click.option('-i', '--input_patch_db', default='patch_info_input.db', help='Input db.', type=click.Path(exists=False), show_default=True)
+@click.option('-o', '--output_patch_db', default='patch_info_output.db', help='Output db.', type=click.Path(exists=False), show_default=True)
+@click.option('-b', '--basename', default='A01', help='Basename.', type=click.Path(exists=False), show_default=True)
+@click.option('-ps', '--patch_size', default=224, help='Patch size.',  show_default=True)
+def remove_basename_from_db(input_patch_db, output_patch_db, basename, patch_size):
+    """Removes basename/ID from SQL DB."""
+    import sqlite3
+    import numpy as np, pandas as pd
+    os.makedirs(output_patch_db[:output_patch_db.rfind('/')],exist_ok=True)
+    conn = sqlite3.connect(input_patch_db)
+    df=pd.read_sql('select * from "{}";'.format(patch_size),con=conn)
+    conn.close()
+    df=df.loc[df['ID']!=basename]
+    conn = sqlite3.connect(output_patch_db)
+    df.set_index('index').to_sql(str(patch_size), con=conn, if_exists='replace')
+    conn.close()
+@preprocessing.command()
+@click.option('-i', '--input_patch_db', default='patch_info_input.db', help='Input db.', type=click.Path(exists=False), show_default=True)
+@click.option('-o', '--output_patch_db', default='patch_info_output.db', help='Output db.', type=click.Path(exists=False), show_default=True)
+@click.option('-fr', '--from_annotations', default=[], multiple=True, help='Annotations to switch from.', show_default=True)
+@click.option('-to', '--to_annotations', default=[], multiple=True, help='Annotations to switch to.', show_default=True)
+@click.option('-ps', '--patch_size', default=224, help='Patch size.',  show_default=True)
+@click.option('-rb', '--remove_background_annotation', default='', help='If selected, removes 100\% background patches based on this annotation.', type=click.Path(exists=False), show_default=True)
+@click.option('-ma', '--max_background_area', default=0.05, help='Max background area before exclusion.',  show_default=True)
+def collapse_annotations(input_patch_db, output_patch_db, from_annotations, to_annotations, patch_size, remove_background_annotation, max_background_area):
+    """Adds annotation classes areas to other annotation classes in SQL DB when getting rid of some annotation classes."""
+    import sqlite3
+    import numpy as np, pandas as pd
+    assert len(from_annotations)==len(to_annotations)
+    from_annotations=list(map(str,from_annotations))
+    to_annotations=list(map(str,to_annotations))
+    os.makedirs(output_patch_db[:output_patch_db.rfind('/')],exist_ok=True)
+    conn = sqlite3.connect(input_patch_db)
+    df=pd.read_sql('select * from "{}";'.format(patch_size),con=conn)
+    conn.close()
+    from_to=zip(from_annotations,to_annotations)
+    if remove_background_annotation:
+        df=df.loc[df[remove_background_annotation]<=(1.-max_background_area)]
+    for fr,to in from_to:
+        df.loc[:,to]+=df[fr]
+    df=df[[col for col in list(df) if col not in from_annotations]]
+    annotations = list(df.iloc[:,6:])
+    df=df.rename(columns={annot:str(i) for i, annot in enumerate(annotations)})
+    annotations = list(df.iloc[:,6:])
+    df.loc[:,'annotation']=np.vectorize(lambda i: annotations[df.iloc[i,6:].values.argmax()])(np.arange(df.shape[0]))
+    df.loc[:,'index']=np.arange(df.shape[0])
+    conn = sqlite3.connect(output_patch_db)
+    #print(df)
+    df.set_index('index').to_sql(str(patch_size), con=conn, if_exists='replace')
+    conn.close()
+if __name__ == '__main__':
+    from dask.distributed import Client
+    dask.config.set({'temporary_dir':'tmp/',
+                    'distributed.worker.local_dir':'tmp/',
+                    'distributed.scheduler.allowed-failures':20})#'distributed.worker.num-workers':20}):
+    c=Client(processes=False)
+    preprocessing()
+    c.close()