|
a |
|
b/pathflowai/cli_preprocessing.py |
|
|
1 |
import argparse |
|
|
2 |
import os |
|
|
3 |
from os.path import join |
|
|
4 |
from pathflowai.utils import run_preprocessing_pipeline, generate_patch_pipeline, img2npy_, create_zero_mask |
|
|
5 |
import click |
|
|
6 |
import dask |
|
|
7 |
import time |
|
|
8 |
|
|
|
9 |
CONTEXT_SETTINGS = dict(help_option_names=['-h','--help'], max_content_width=90) |
|
|
10 |
|
|
|
11 |
@click.group(context_settings= CONTEXT_SETTINGS) |
|
|
12 |
@click.version_option(version='0.1') |
|
|
13 |
def preprocessing(): |
|
|
14 |
pass |
|
|
15 |
|
|
|
16 |
def output_if_exists(filename): |
|
|
17 |
"""Returns file name if the file exists |
|
|
18 |
|
|
|
19 |
Parameters |
|
|
20 |
---------- |
|
|
21 |
filename : str |
|
|
22 |
File in question. |
|
|
23 |
|
|
|
24 |
Returns |
|
|
25 |
------- |
|
|
26 |
str |
|
|
27 |
Filename. |
|
|
28 |
|
|
|
29 |
""" |
|
|
30 |
|
|
|
31 |
if os.path.exists(filename): |
|
|
32 |
return filename |
|
|
33 |
return None |
|
|
34 |
|
|
|
35 |
@preprocessing.command() |
|
|
36 |
@click.option('-npy', '--img2npy', is_flag=True, help='Image to numpy for faster read.', show_default=True) |
|
|
37 |
@click.option('-b', '--basename', default='A01', help='Basename of patches.', type=click.Path(exists=False), show_default=True) |
|
|
38 |
@click.option('-i', '--input_dir', default='./inputs/', help='Input directory for patches.', type=click.Path(exists=False), show_default=True) |
|
|
39 |
@click.option('-a', '--annotations', default=[], multiple=True, help='Annotations in image in order.', type=click.Path(exists=False), show_default=True) |
|
|
40 |
@click.option('-pr', '--preprocess', is_flag=True, help='Run preprocessing pipeline.', show_default=True) |
|
|
41 |
@click.option('-pa', '--patches', is_flag=True, help='Add patches to SQL.', show_default=True) |
|
|
42 |
@click.option('-t', '--threshold', default=0.05, help='Threshold to remove non-purple slides.', show_default=True) |
|
|
43 |
@click.option('-ps', '--patch_size', default=224, help='Patch size.', show_default=True) |
|
|
44 |
@click.option('-it', '--intensity_threshold', default=100., help='Intensity threshold to rate a pixel as non-white.', show_default=True) |
|
|
45 |
@click.option('-g', '--generate_finetune_segmentation', is_flag=True, help='Generate patches for one segmentation mask class for targeted finetuning.', show_default=True) |
|
|
46 |
@click.option('-tc', '--target_segmentation_class', default=0, help='Segmentation Class to finetune on, output patches to another db.', show_default=True) |
|
|
47 |
@click.option('-tt', '--target_threshold', default=0., help='Threshold to include target for segmentation if saving one class.', show_default=True) |
|
|
48 |
@click.option('-odb', '--out_db', default='./patch_info.db', help='Output patch database.', type=click.Path(exists=False), show_default=True) |
|
|
49 |
@click.option('-am', '--adjust_mask', is_flag=True, help='Remove additional background regions from annotation mask.', show_default=True) |
|
|
50 |
@click.option('-nn', '--n_neighbors', default=5, help='If adjusting mask, number of neighbors connectivity to remove.', show_default=True) |
|
|
51 |
@click.option('-bp', '--basic_preprocess', is_flag=True, help='Basic preprocessing pipeline, annotation areas are not saved. Used for benchmarking tool against comparable pipelines', show_default=True) |
|
|
52 |
@click.option('-ei', '--entire_image', is_flag=True, help='Store entire image in central db rather than patches.', show_default=True) |
|
|
53 |
@click.option('-nz', '--no_zarr', is_flag=True, help='Don\'t save zarr format file.', show_default=True) |
|
|
54 |
@click.option('-pka', '--pkl_annot', is_flag=True, help='Look for .annot.pkl pickle files instead of xml annotations.', show_default=True) |
|
|
55 |
@click.option('-ta', '--transpose_annotations', is_flag=True, help='Transpose annotations.', show_default=True) |
|
|
56 |
@click.option('-gtm', '--get_tissue_mask', is_flag=True, help='Build tissue mask instead of intensity thresholding.', show_default=True) |
|
|
57 |
@click.option('-ot', '--otsu', is_flag=True, help='Utilize otsu method to decide intensity threshold.', show_default=True) |
|
|
58 |
@click.option('-cm', '--compression', default=8., help='If find tissue mask, how much to downsample image.', show_default=True) |
|
|
59 |
@click.option('-ch', '--return_convex_hull', is_flag=True, help='Return convex hull of tissue mask.', show_default=True) |
|
|
60 |
@click.option('-kh', '--keep_holes', is_flag=True, help='Keep holes tissue mask.', show_default=True) |
|
|
61 |
@click.option('-mhs', '--max_hole_size', default=0, help='If removing holes, what is maximum allowed size to remain.', show_default=True) |
|
|
62 |
@click.option('-gbc', '--gray_before_close', is_flag=True, help='Filter grays before binary closing operation.', show_default=True) |
|
|
63 |
@click.option('-kl', '--kernel', default=61, help='Binary closing kernel.', show_default=True) |
|
|
64 |
@click.option('-mos', '--min_object_size', default=100000, help='Remove all connected components smaller than this size.', show_default=True) |
|
|
65 |
@click.option('-bs', '--blur_size', default=0, help='How much to blur tissue mask.', show_default=True) |
|
|
66 |
def preprocess_pipeline(img2npy,basename,input_dir,annotations,preprocess,patches,threshold,patch_size, intensity_threshold, generate_finetune_segmentation, target_segmentation_class, target_threshold, out_db, adjust_mask, n_neighbors, basic_preprocess, entire_image, no_zarr, pkl_annot, transpose_annotations,get_tissue_mask,otsu,compression,return_convex_hull, keep_holes, max_hole_size, gray_before_close, kernel, min_object_size, blur_size): |
|
|
67 |
"""Preprocessing pipeline that accomplishes 3 things. 1: storage into ZARR format, 2: optional mask adjustment, 3: storage of patch-level information into SQL DB""" |
|
|
68 |
|
|
|
69 |
for ext in ['.npy','.svs','.tiff','.tif', '.vms', '.vmu', '.ndpi', '.scn', '.mrxs', '.svslide', '.bif', '.jpeg', '.png', '.h5']: |
|
|
70 |
svs_file = output_if_exists(join(input_dir,'{}{}'.format(basename,ext))) |
|
|
71 |
if svs_file != None: |
|
|
72 |
break |
|
|
73 |
|
|
|
74 |
if img2npy and not svs_file.endswith('.npy'): |
|
|
75 |
svs_file = img2npy_(input_dir,basename, svs_file) |
|
|
76 |
|
|
|
77 |
xml_file = output_if_exists(join(input_dir,'{}{}'.format(basename,".xml" if not pkl_annot else ".annot.pkl"))) |
|
|
78 |
npy_mask = output_if_exists(join(input_dir,'{}_mask.npy'.format(basename))) |
|
|
79 |
out_zarr = join(input_dir,'{}.zarr'.format(basename)) |
|
|
80 |
out_pkl = join(input_dir,'{}_mask.pkl'.format(basename)) |
|
|
81 |
adj_npy='' |
|
|
82 |
|
|
|
83 |
|
|
|
84 |
start=time.time() |
|
|
85 |
if preprocess: |
|
|
86 |
run_preprocessing_pipeline(svs_file=svs_file, |
|
|
87 |
xml_file=xml_file, |
|
|
88 |
npy_mask=npy_mask, |
|
|
89 |
annotations=annotations, |
|
|
90 |
out_zarr=out_zarr, |
|
|
91 |
out_pkl=out_pkl, |
|
|
92 |
no_zarr=no_zarr, |
|
|
93 |
transpose_annotations=transpose_annotations) |
|
|
94 |
|
|
|
95 |
if npy_mask==None and xml_file==None: |
|
|
96 |
print('Generating Zero Mask') |
|
|
97 |
npy_mask=join(input_dir,'{}_mask.npz'.format(basename)) |
|
|
98 |
target_segmentation_class=1 |
|
|
99 |
generate_finetune_segmentation=True |
|
|
100 |
create_zero_mask(npy_mask,out_zarr if not no_zarr else svs_file,out_pkl) |
|
|
101 |
|
|
|
102 |
|
|
|
103 |
preprocess_point = time.time() |
|
|
104 |
print('Data dump took {}'.format(preprocess_point-start)) |
|
|
105 |
|
|
|
106 |
if adjust_mask: |
|
|
107 |
from pathflowai.utils import adjust_mask |
|
|
108 |
adj_dir=join(input_dir,'adjusted_masks') |
|
|
109 |
adj_npy=join(adj_dir,os.path.basename(npy_mask)) |
|
|
110 |
os.makedirs(adj_dir,exist_ok=True) |
|
|
111 |
if not os.path.exists(adj_npy): |
|
|
112 |
adjust_mask(npy_mask, out_zarr if not no_zarr else svs_file, adj_npy, n_neighbors) |
|
|
113 |
adjust_point = time.time() |
|
|
114 |
print('Adjust took {}'.format(adjust_point-preprocess_point)) |
|
|
115 |
|
|
|
116 |
|
|
|
117 |
if patches: # ADD EXPORT TO SQL, TABLE NAME IS PATCH SIZE |
|
|
118 |
generate_patch_pipeline(basename, |
|
|
119 |
input_dir=input_dir, |
|
|
120 |
annotations=annotations, |
|
|
121 |
threshold=threshold, |
|
|
122 |
patch_size=patch_size, |
|
|
123 |
out_db=out_db, |
|
|
124 |
generate_finetune_segmentation=generate_finetune_segmentation, |
|
|
125 |
target_class=target_segmentation_class, |
|
|
126 |
intensity_threshold=intensity_threshold, |
|
|
127 |
target_threshold=target_threshold, |
|
|
128 |
adj_mask=adj_npy, |
|
|
129 |
basic_preprocess=basic_preprocess, |
|
|
130 |
entire_image=entire_image, |
|
|
131 |
svs_file=svs_file, |
|
|
132 |
transpose_annotations=transpose_annotations, |
|
|
133 |
get_tissue_mask=get_tissue_mask, |
|
|
134 |
otsu=otsu, |
|
|
135 |
compression=compression, |
|
|
136 |
return_convex_hull=return_convex_hull, |
|
|
137 |
keep_holes=keep_holes, |
|
|
138 |
max_hole_size=max_hole_size, |
|
|
139 |
gray_before_close=gray_before_close, |
|
|
140 |
kernel=kernel, |
|
|
141 |
min_object_size=min_object_size, |
|
|
142 |
blur_size=blur_size) |
|
|
143 |
patch_point = time.time() |
|
|
144 |
print('Patches took {}'.format(patch_point-adjust_point)) |
|
|
145 |
|
|
|
146 |
@preprocessing.command() |
|
|
147 |
@click.option('-i', '--mask_dir', default='./inputs/', help='Input directory for masks.', type=click.Path(exists=False), show_default=True) |
|
|
148 |
@click.option('-o', '--output_dir', default='./outputs/', help='Output directory for new masks.', type=click.Path(exists=False), show_default=True) |
|
|
149 |
@click.option('-fr', '--from_annotations', default=[], multiple=True, help='Annotations to switch from.', show_default=True) |
|
|
150 |
@click.option('-to', '--to_annotations', default=[], multiple=True, help='Annotations to switch to.', show_default=True) |
|
|
151 |
def alter_masks(mask_dir, output_dir, from_annotations, to_annotations): |
|
|
152 |
"""Map list of values to other values in mask.""" |
|
|
153 |
import glob |
|
|
154 |
from pathflowai.utils import npy2da |
|
|
155 |
import numpy as np |
|
|
156 |
from dask.distributed import Client |
|
|
157 |
assert len(from_annotations)==len(to_annotations) |
|
|
158 |
c=Client() |
|
|
159 |
from_annotations=list(map(int,from_annotations)) |
|
|
160 |
to_annotations=list(map(int,to_annotations)) |
|
|
161 |
os.makedirs(output_dir,exist_ok=True) |
|
|
162 |
masks=glob.glob(join(mask_dir,'*_mask.npy')) |
|
|
163 |
from_to=list(zip(from_annotations,to_annotations)) |
|
|
164 |
for mask in masks: |
|
|
165 |
output_mask=join(output_dir,os.path.basename(mask)) |
|
|
166 |
arr=npy2da(mask) |
|
|
167 |
for fr,to in from_to: |
|
|
168 |
arr[arr==fr]=to |
|
|
169 |
np.save(output_mask,arr.compute()) |
|
|
170 |
|
|
|
171 |
@preprocessing.command() |
|
|
172 |
@click.option('-i', '--input_patch_db', default='patch_info_input.db', help='Input db.', type=click.Path(exists=False), show_default=True) |
|
|
173 |
@click.option('-o', '--output_patch_db', default='patch_info_output.db', help='Output db.', type=click.Path(exists=False), show_default=True) |
|
|
174 |
@click.option('-b', '--basename', default='A01', help='Basename.', type=click.Path(exists=False), show_default=True) |
|
|
175 |
@click.option('-ps', '--patch_size', default=224, help='Patch size.', show_default=True) |
|
|
176 |
def remove_basename_from_db(input_patch_db, output_patch_db, basename, patch_size): |
|
|
177 |
"""Removes basename/ID from SQL DB.""" |
|
|
178 |
import sqlite3 |
|
|
179 |
import numpy as np, pandas as pd |
|
|
180 |
os.makedirs(output_patch_db[:output_patch_db.rfind('/')],exist_ok=True) |
|
|
181 |
conn = sqlite3.connect(input_patch_db) |
|
|
182 |
df=pd.read_sql('select * from "{}";'.format(patch_size),con=conn) |
|
|
183 |
conn.close() |
|
|
184 |
df=df.loc[df['ID']!=basename] |
|
|
185 |
conn = sqlite3.connect(output_patch_db) |
|
|
186 |
df.set_index('index').to_sql(str(patch_size), con=conn, if_exists='replace') |
|
|
187 |
conn.close() |
|
|
188 |
|
|
|
189 |
|
|
|
190 |
@preprocessing.command() |
|
|
191 |
@click.option('-i', '--input_patch_db', default='patch_info_input.db', help='Input db.', type=click.Path(exists=False), show_default=True) |
|
|
192 |
@click.option('-o', '--output_patch_db', default='patch_info_output.db', help='Output db.', type=click.Path(exists=False), show_default=True) |
|
|
193 |
@click.option('-fr', '--from_annotations', default=[], multiple=True, help='Annotations to switch from.', show_default=True) |
|
|
194 |
@click.option('-to', '--to_annotations', default=[], multiple=True, help='Annotations to switch to.', show_default=True) |
|
|
195 |
@click.option('-ps', '--patch_size', default=224, help='Patch size.', show_default=True) |
|
|
196 |
@click.option('-rb', '--remove_background_annotation', default='', help='If selected, removes 100\% background patches based on this annotation.', type=click.Path(exists=False), show_default=True) |
|
|
197 |
@click.option('-ma', '--max_background_area', default=0.05, help='Max background area before exclusion.', show_default=True) |
|
|
198 |
def collapse_annotations(input_patch_db, output_patch_db, from_annotations, to_annotations, patch_size, remove_background_annotation, max_background_area): |
|
|
199 |
"""Adds annotation classes areas to other annotation classes in SQL DB when getting rid of some annotation classes.""" |
|
|
200 |
import sqlite3 |
|
|
201 |
import numpy as np, pandas as pd |
|
|
202 |
assert len(from_annotations)==len(to_annotations) |
|
|
203 |
from_annotations=list(map(str,from_annotations)) |
|
|
204 |
to_annotations=list(map(str,to_annotations)) |
|
|
205 |
os.makedirs(output_patch_db[:output_patch_db.rfind('/')],exist_ok=True) |
|
|
206 |
conn = sqlite3.connect(input_patch_db) |
|
|
207 |
df=pd.read_sql('select * from "{}";'.format(patch_size),con=conn) |
|
|
208 |
conn.close() |
|
|
209 |
from_to=zip(from_annotations,to_annotations) |
|
|
210 |
if remove_background_annotation: |
|
|
211 |
df=df.loc[df[remove_background_annotation]<=(1.-max_background_area)] |
|
|
212 |
for fr,to in from_to: |
|
|
213 |
df.loc[:,to]+=df[fr] |
|
|
214 |
df=df[[col for col in list(df) if col not in from_annotations]] |
|
|
215 |
annotations = list(df.iloc[:,6:]) |
|
|
216 |
df=df.rename(columns={annot:str(i) for i, annot in enumerate(annotations)}) |
|
|
217 |
annotations = list(df.iloc[:,6:]) |
|
|
218 |
df.loc[:,'annotation']=np.vectorize(lambda i: annotations[df.iloc[i,6:].values.argmax()])(np.arange(df.shape[0])) |
|
|
219 |
df.loc[:,'index']=np.arange(df.shape[0]) |
|
|
220 |
conn = sqlite3.connect(output_patch_db) |
|
|
221 |
#print(df) |
|
|
222 |
df.set_index('index').to_sql(str(patch_size), con=conn, if_exists='replace') |
|
|
223 |
conn.close() |
|
|
224 |
|
|
|
225 |
|
|
|
226 |
if __name__ == '__main__': |
|
|
227 |
from dask.distributed import Client |
|
|
228 |
dask.config.set({'temporary_dir':'tmp/', |
|
|
229 |
'distributed.worker.local_dir':'tmp/', |
|
|
230 |
'distributed.scheduler.allowed-failures':20})#'distributed.worker.num-workers':20}): |
|
|
231 |
c=Client(processes=False) |
|
|
232 |
preprocessing() |
|
|
233 |
c.close() |