|
a |
|
b/pathaia/patches/functional_api.py |
|
|
1 |
# coding: utf8 |
|
|
2 |
""" |
|
|
3 |
A module to extract patches in a slide. |
|
|
4 |
|
|
|
5 |
Enable filtering on tissue surface ratio. |
|
|
6 |
Draft for hierarchical patch extraction and representation is proposed. |
|
|
7 |
""" |
|
|
8 |
import warnings |
|
|
9 |
import numpy |
|
|
10 |
import openslide |
|
|
11 |
from ..util.paths import slides_in_folder, slide_basename, safe_rmtree, get_files |
|
|
12 |
from ..util.images import regular_grid, get_coords_from_mask |
|
|
13 |
from ..util.basic import ifnone |
|
|
14 |
from ..util.types import ( |
|
|
15 |
Filter, |
|
|
16 |
FilterList, |
|
|
17 |
PathLike, |
|
|
18 |
NDImage, |
|
|
19 |
NDBoolMask, |
|
|
20 |
Coord, |
|
|
21 |
Patch, |
|
|
22 |
Slide, |
|
|
23 |
) |
|
|
24 |
from .visu import preview_from_queries |
|
|
25 |
from .filters import ( |
|
|
26 |
filter_hasdapi, |
|
|
27 |
filter_has_significant_dapi, |
|
|
28 |
filter_has_tissue_he, |
|
|
29 |
standardize_filters, |
|
|
30 |
) |
|
|
31 |
from .slide_filters import filter_thumbnail, filter_fluo_thumbnail |
|
|
32 |
from .compat import convert_coords |
|
|
33 |
import os |
|
|
34 |
import csv |
|
|
35 |
from skimage.io import imsave |
|
|
36 |
from tqdm import tqdm |
|
|
37 |
from .errors import UnknownFilterError, InvalidArgument |
|
|
38 |
from typing import Optional, Sequence, Tuple, Iterator, Dict |
|
|
39 |
from ..util.convert import ( |
|
|
40 |
get_categorical_segments_from_edges, |
|
|
41 |
get_categorical_layer_edges, |
|
|
42 |
layer_segment_to_json_struct, |
|
|
43 |
gen_categorical_from_floatpreds, |
|
|
44 |
colorCycle, |
|
|
45 |
) |
|
|
46 |
import json |
|
|
47 |
from fastcore.basics import listify |
|
|
48 |
|
|
|
49 |
|
|
|
50 |
izi_filters = { |
|
|
51 |
"has-dapi": filter_hasdapi, |
|
|
52 |
"has-significant-dapi": filter_has_significant_dapi, |
|
|
53 |
"has-tissue-he": filter_has_tissue_he, |
|
|
54 |
} |
|
|
55 |
|
|
|
56 |
slide_filters = {"full": filter_thumbnail, "fluo": filter_fluo_thumbnail} |
|
|
57 |
|
|
|
58 |
|
|
|
59 |
def filter_image(image: NDImage, filters: Sequence[Filter]) -> bool: |
|
|
60 |
""" |
|
|
61 |
Check multiple filters on an image. |
|
|
62 |
|
|
|
63 |
Args: |
|
|
64 |
image: the patch to be filtered. |
|
|
65 |
filters: functions that turn images into booleans. |
|
|
66 |
|
|
|
67 |
Returns: |
|
|
68 |
Whether an image has passed every filters. |
|
|
69 |
|
|
|
70 |
""" |
|
|
71 |
for filt in filters: |
|
|
72 |
if callable(filt): |
|
|
73 |
if not filt(image): |
|
|
74 |
return False |
|
|
75 |
elif type(filt) == str: |
|
|
76 |
if filt not in izi_filters: |
|
|
77 |
raise UnknownFilterError("{} is not a valid filter !!!".format(filt)) |
|
|
78 |
if not izi_filters[filt](image): |
|
|
79 |
return False |
|
|
80 |
else: |
|
|
81 |
raise UnknownFilterError("{} is not a valid filter !!!".format(filt)) |
|
|
82 |
return True |
|
|
83 |
|
|
|
84 |
|
|
|
85 |
def apply_slide_filters(thumb: NDImage, filters: Sequence[Filter]) -> NDBoolMask: |
|
|
86 |
""" |
|
|
87 |
Apply all filters to input thumbnail. Performs logical and between output masks. |
|
|
88 |
|
|
|
89 |
Args: |
|
|
90 |
thumb: thumbnail to compute mask from. |
|
|
91 |
filters: list of filters to apply to thumb. Each |
|
|
92 |
filter should output a boolean mask with same dimensions as thumb. |
|
|
93 |
|
|
|
94 |
Returns: |
|
|
95 |
Boolean mask where tissue pixels are True. |
|
|
96 |
|
|
|
97 |
""" |
|
|
98 |
mask = numpy.ones(thumb.shape[:2], dtype=bool) |
|
|
99 |
for filt in filters: |
|
|
100 |
if isinstance(filt, str): |
|
|
101 |
if filt not in slide_filters: |
|
|
102 |
raise UnknownFilterError("{} is not a valid filter !!!".format(filt)) |
|
|
103 |
filt = slide_filters[filt] |
|
|
104 |
mask = mask & filt(thumb) |
|
|
105 |
return mask |
|
|
106 |
|
|
|
107 |
|
|
|
108 |
def slide_rois( |
|
|
109 |
slide: Slide, |
|
|
110 |
level: int, |
|
|
111 |
psize: Coord, |
|
|
112 |
interval: Coord = (0, 0), |
|
|
113 |
ancestors: Optional[Sequence[Patch]] = None, |
|
|
114 |
offset: Coord = (0, 0), |
|
|
115 |
filters: Optional[Sequence[Filter]] = None, |
|
|
116 |
thumb_size: int = 512, |
|
|
117 |
slide_filters: Optional[Sequence[Filter]] = None, |
|
|
118 |
) -> Iterator[Tuple[Patch, NDImage]]: |
|
|
119 |
""" |
|
|
120 |
Get patches with coordinates. |
|
|
121 |
|
|
|
122 |
Given a slide, a pyramid level, a patchsize in pixels, an interval in pixels |
|
|
123 |
and an offset in pixels, get patches with its coordinates. |
|
|
124 |
|
|
|
125 |
Args: |
|
|
126 |
slide: the slide to patchify. |
|
|
127 |
level: pyramid level. |
|
|
128 |
psize: (w, h) size of the patches (in pixels). |
|
|
129 |
interval: (x, y) interval between 2 neighboring patches. |
|
|
130 |
ancestors: patches that contain upcoming patches. |
|
|
131 |
offset: (x, y) offset in px on x and y axis for patch start. |
|
|
132 |
filters: filters to accept patches. |
|
|
133 |
thumb_size: size of thumbnail's longest side. Always preserves aspect ratio. |
|
|
134 |
slide_filters: list of filters to apply to thumbnail. Should output boolean mask. |
|
|
135 |
|
|
|
136 |
Yields: |
|
|
137 |
A tuple containing a Patch object and the corresponding image as |
|
|
138 |
ndarray. |
|
|
139 |
|
|
|
140 |
""" |
|
|
141 |
psize = convert_coords(psize) |
|
|
142 |
offset = convert_coords(offset) |
|
|
143 |
ancestors = ifnone(ancestors, []) |
|
|
144 |
filters = listify(filters) |
|
|
145 |
slide_filters = listify(slide_filters) |
|
|
146 |
if len(ancestors) > 0: |
|
|
147 |
mag = slide.level_downsamples[level] |
|
|
148 |
shape = Coord(ancestors[0].size_0) / mag |
|
|
149 |
size_0 = psize * mag |
|
|
150 |
patches = [] |
|
|
151 |
for ancestor in ancestors: |
|
|
152 |
# ancestor is a patch |
|
|
153 |
prefix = ancestor.id |
|
|
154 |
k = 0 |
|
|
155 |
for patch_coord in regular_grid(shape, interval, psize): |
|
|
156 |
k += 1 |
|
|
157 |
idx = "{}#{}".format(prefix, k) |
|
|
158 |
position = patch_coord * mag + ancestor.position |
|
|
159 |
patches.append( |
|
|
160 |
Patch( |
|
|
161 |
id=idx, |
|
|
162 |
slidename=slide._filename.split("/")[-1], |
|
|
163 |
position=position, |
|
|
164 |
level=level, |
|
|
165 |
size=psize, |
|
|
166 |
size_0=size_0, |
|
|
167 |
parent=ancestor, |
|
|
168 |
) |
|
|
169 |
) |
|
|
170 |
for patch in tqdm(patches, ascii=True): |
|
|
171 |
try: |
|
|
172 |
image = slide.read_region(patch.position, patch.level, patch.size) |
|
|
173 |
image = numpy.array(image)[:, :, 0:3] |
|
|
174 |
if filter_image(image, filters): |
|
|
175 |
yield patch, image |
|
|
176 |
except openslide.lowlevel.OpenSlideError: |
|
|
177 |
print( |
|
|
178 |
"small failure while reading tile x={}, y={} in {}".format( |
|
|
179 |
*patch.position, slide._filename |
|
|
180 |
) |
|
|
181 |
) |
|
|
182 |
else: |
|
|
183 |
shape = Coord(*slide.level_dimensions[level]) |
|
|
184 |
mag = slide.level_downsamples[level] |
|
|
185 |
thumb = numpy.array(slide.get_thumbnail((thumb_size, thumb_size))) |
|
|
186 |
mask = apply_slide_filters(thumb, slide_filters) |
|
|
187 |
k = 0 |
|
|
188 |
for patch_coord in get_coords_from_mask(mask, shape, interval, psize): |
|
|
189 |
k += 1 |
|
|
190 |
idx = "#{}".format(k) |
|
|
191 |
position = patch_coord * mag + offset |
|
|
192 |
size_0 = psize * mag |
|
|
193 |
try: |
|
|
194 |
image = slide.read_region(position, level, psize) |
|
|
195 |
image = numpy.array(image)[:, :, 0:3] |
|
|
196 |
if filter_image(image, filters): |
|
|
197 |
yield Patch( |
|
|
198 |
id=idx, |
|
|
199 |
slidename=slide._filename.split("/")[-1], |
|
|
200 |
position=position, |
|
|
201 |
level=level, |
|
|
202 |
size=psize, |
|
|
203 |
size_0=size_0, |
|
|
204 |
), image |
|
|
205 |
except openslide.lowlevel.OpenSlideError: |
|
|
206 |
print( |
|
|
207 |
"small failure while reading tile x={}, y={} in {}".format( |
|
|
208 |
*position, slide._filename |
|
|
209 |
) |
|
|
210 |
) |
|
|
211 |
|
|
|
212 |
|
|
|
213 |
def slide_rois_no_image( |
|
|
214 |
slide: Slide, |
|
|
215 |
level: int, |
|
|
216 |
psize: Coord, |
|
|
217 |
interval: Coord = (0, 0), |
|
|
218 |
ancestors: Optional[Sequence[Patch]] = None, |
|
|
219 |
offset: Coord = (0, 0), |
|
|
220 |
thumb_size: int = 512, |
|
|
221 |
slide_filters: Optional[Sequence[Filter]] = None, |
|
|
222 |
) -> Iterator[Patch]: |
|
|
223 |
""" |
|
|
224 |
Get patches with coordinates. |
|
|
225 |
|
|
|
226 |
Given a slide, a pyramid level, a patchsize in pixels, an interval in pixels |
|
|
227 |
and an offset in pixels, get patches with its coordinates. Does not export image at |
|
|
228 |
any point. |
|
|
229 |
|
|
|
230 |
Args: |
|
|
231 |
slide: the slide to patchify. |
|
|
232 |
level: pyramid level. |
|
|
233 |
psize: (w, h) size of the patches (in pixels). |
|
|
234 |
interval: (x, y) interval between 2 neighboring patches. |
|
|
235 |
ancestors: patches that contain upcoming patches. |
|
|
236 |
offset: (x, y) offset in px on x and y axis for patch start. |
|
|
237 |
thumb_size: size of thumbnail's longest side. Always preserves aspect ratio. |
|
|
238 |
slide_filters: list of filters to apply to thumbnail. Should output boolean mask. |
|
|
239 |
|
|
|
240 |
Yields: |
|
|
241 |
A tuple containing a Patch object and the corresponding image as |
|
|
242 |
ndarray. |
|
|
243 |
|
|
|
244 |
""" |
|
|
245 |
psize = convert_coords(psize) |
|
|
246 |
offset = convert_coords(offset) |
|
|
247 |
ancestors = ifnone(ancestors, []) |
|
|
248 |
slide_filters = listify(slide_filters) |
|
|
249 |
if len(ancestors) > 0: |
|
|
250 |
mag = slide.level_downsamples[level] |
|
|
251 |
shape = Coord(ancestors[0].size_0) / mag |
|
|
252 |
size_0 = psize * mag |
|
|
253 |
for ancestor in ancestors: |
|
|
254 |
# ancestor is a patch |
|
|
255 |
prefix = ancestor.id |
|
|
256 |
k = 0 |
|
|
257 |
for patch_coord in regular_grid(shape, interval, psize): |
|
|
258 |
k += 1 |
|
|
259 |
idx = "{}#{}".format(prefix, k) |
|
|
260 |
position = patch_coord * mag + ancestor.position |
|
|
261 |
yield Patch( |
|
|
262 |
id=idx, |
|
|
263 |
slidename=slide._filename.split("/")[-1], |
|
|
264 |
position=position, |
|
|
265 |
level=level, |
|
|
266 |
size=psize, |
|
|
267 |
size_0=size_0, |
|
|
268 |
parent=ancestor, |
|
|
269 |
) |
|
|
270 |
else: |
|
|
271 |
shape = Coord(*slide.level_dimensions[level]) |
|
|
272 |
mag = slide.level_downsamples[level] |
|
|
273 |
thumb = numpy.array(slide.get_thumbnail((thumb_size, thumb_size))) |
|
|
274 |
mask = apply_slide_filters(thumb, slide_filters) |
|
|
275 |
k = 0 |
|
|
276 |
for patch_coord in get_coords_from_mask(mask, shape, interval, psize): |
|
|
277 |
k += 1 |
|
|
278 |
idx = "#{}".format(k) |
|
|
279 |
position = patch_coord * mag + offset |
|
|
280 |
size_0 = psize * mag |
|
|
281 |
yield Patch( |
|
|
282 |
id=idx, |
|
|
283 |
slidename=slide._filename.split("/")[-1], |
|
|
284 |
position=position, |
|
|
285 |
level=level, |
|
|
286 |
size=psize, |
|
|
287 |
size_0=size_0, |
|
|
288 |
) |
|
|
289 |
|
|
|
290 |
|
|
|
291 |
def patchify_slide( |
|
|
292 |
slidefile: PathLike, |
|
|
293 |
outdir: PathLike, |
|
|
294 |
level: int, |
|
|
295 |
psize: Coord, |
|
|
296 |
interval: Coord = (0, 0), |
|
|
297 |
offset: Coord = (0, 0), |
|
|
298 |
filters: Optional[Sequence[Filter]] = None, |
|
|
299 |
erase_tree: Optional[bool] = None, |
|
|
300 |
thumb_size: int = 512, |
|
|
301 |
slide_filters: Optional[Sequence[Filter]] = None, |
|
|
302 |
verbose: int = 2, |
|
|
303 |
silence: int = 0, |
|
|
304 |
backend: str = "openslide", |
|
|
305 |
): |
|
|
306 |
""" |
|
|
307 |
Save patches of a given wsi. |
|
|
308 |
|
|
|
309 |
Args: |
|
|
310 |
slidefile: abs path to slide file. |
|
|
311 |
outdir: abs path to an output folder. |
|
|
312 |
level: pyramid level. |
|
|
313 |
psize: (w, h) size of the patches (in pixels). |
|
|
314 |
interval: (x, y) interval between 2 neighboring patches. |
|
|
315 |
offset: (x, y) offset in px on x and y axis for patch start. |
|
|
316 |
filters: filters to accept patches. |
|
|
317 |
erase_tree: whether to erase outfolder if it exists. If None, user will be |
|
|
318 |
prompted for a choice. |
|
|
319 |
thumb_size: size of thumbnail's longest side. Always preserves aspect ratio. |
|
|
320 |
slide_filters: list of filters to apply to thumbnail. Should output boolean |
|
|
321 |
mask. |
|
|
322 |
verbose: 0 => nada, 1 => patchifying parameters, 2 => start-end of |
|
|
323 |
processes, thumbnail export. |
|
|
324 |
silence: 0 => write images, 1 => use images, but do not write, 2 => not even |
|
|
325 |
using images for filtering. |
|
|
326 |
backend: whether to use openslide or cucim as backend. |
|
|
327 |
|
|
|
328 |
""" |
|
|
329 |
# Get name of the slide |
|
|
330 |
slide_id = slide_basename(slidefile) |
|
|
331 |
# if output directory has the same name, it's ok |
|
|
332 |
if os.path.basename(outdir) == slide_id: |
|
|
333 |
slide_folder_output = outdir |
|
|
334 |
# otherwise, create a subfolder with the name of the slide |
|
|
335 |
else: |
|
|
336 |
slide_folder_output = os.path.join(outdir, slide_id) |
|
|
337 |
if os.path.isdir(slide_folder_output): |
|
|
338 |
erase_tree = safe_rmtree( |
|
|
339 |
slide_folder_output, ignore_errors=True, erase_tree=erase_tree |
|
|
340 |
) |
|
|
341 |
os.makedirs(slide_folder_output, exist_ok=True) |
|
|
342 |
|
|
|
343 |
if verbose > 0: |
|
|
344 |
print("patchifying: {}".format(slidefile)) |
|
|
345 |
if verbose > 1: |
|
|
346 |
print("level: {}".format(level)) |
|
|
347 |
print("patch-size: {}".format(psize)) |
|
|
348 |
print("interval: {}".format(interval)) |
|
|
349 |
print("offset: {}".format(offset)) |
|
|
350 |
print("filtering: {}".format(filters)) |
|
|
351 |
print("starting patchification...") |
|
|
352 |
slide = Slide(slidefile, backend=backend) |
|
|
353 |
plist = [] |
|
|
354 |
# level directory |
|
|
355 |
outleveldir = os.path.join(slide_folder_output, "level_{}".format(level)) |
|
|
356 |
if os.path.isdir(outleveldir): |
|
|
357 |
safe_rmtree(outleveldir, ignore_errors=True, erase_tree=erase_tree) |
|
|
358 |
os.makedirs(outleveldir, exist_ok=True) |
|
|
359 |
######################## |
|
|
360 |
with warnings.catch_warnings(): |
|
|
361 |
warnings.simplefilter("ignore") |
|
|
362 |
if silence == 0: |
|
|
363 |
# Write images |
|
|
364 |
for patch, img in slide_rois( |
|
|
365 |
slide, |
|
|
366 |
level, |
|
|
367 |
psize, |
|
|
368 |
interval, |
|
|
369 |
offset=offset, |
|
|
370 |
filters=filters, |
|
|
371 |
thumb_size=thumb_size, |
|
|
372 |
slide_filters=slide_filters, |
|
|
373 |
): |
|
|
374 |
outfile = os.path.join( |
|
|
375 |
outleveldir, "{}_{}_{}.png".format(*patch.position, patch.level) |
|
|
376 |
) |
|
|
377 |
imsave(outfile, img) |
|
|
378 |
plist.append(patch) |
|
|
379 |
elif silence == 1: |
|
|
380 |
# Do not write patches (but use images to filter) |
|
|
381 |
for patch, img in slide_rois( |
|
|
382 |
slide, |
|
|
383 |
level, |
|
|
384 |
psize, |
|
|
385 |
interval, |
|
|
386 |
offset=offset, |
|
|
387 |
filters=filters, |
|
|
388 |
thumb_size=thumb_size, |
|
|
389 |
slide_filters=slide_filters, |
|
|
390 |
): |
|
|
391 |
plist.append(patch) |
|
|
392 |
elif silence == 2: |
|
|
393 |
# Do not write images and do not even use them to filter patches |
|
|
394 |
for patch in slide_rois_no_image( |
|
|
395 |
slide, |
|
|
396 |
level, |
|
|
397 |
psize, |
|
|
398 |
interval, |
|
|
399 |
offset=offset, |
|
|
400 |
thumb_size=thumb_size, |
|
|
401 |
slide_filters=slide_filters, |
|
|
402 |
): |
|
|
403 |
plist.append(patch) |
|
|
404 |
else: |
|
|
405 |
raise InvalidArgument( |
|
|
406 |
"Invalid 'silence' parameter: '{}'," |
|
|
407 |
" should be one of [0, 1, 2]".format(silence) |
|
|
408 |
) |
|
|
409 |
if verbose > 1: |
|
|
410 |
print("end of patchification.") |
|
|
411 |
print("starting metadata csv export...") |
|
|
412 |
csv_columns = Patch.get_fields() |
|
|
413 |
csv_path = os.path.join(slide_folder_output, "patches.csv") |
|
|
414 |
with open(csv_path, "w") as csvfile: |
|
|
415 |
writer = csv.DictWriter(csvfile, csv_columns) |
|
|
416 |
writer.writeheader() |
|
|
417 |
writer.writerows(map(Patch.to_csv_row, plist)) |
|
|
418 |
if verbose > 1: |
|
|
419 |
print("end of metadata export.") |
|
|
420 |
print("starting thumbnail export...") |
|
|
421 |
out_thumbnailfile = os.path.join(outleveldir, "thumbnail.png") |
|
|
422 |
thumbnail = preview_from_queries(slide, plist) |
|
|
423 |
imsave(out_thumbnailfile, thumbnail) |
|
|
424 |
print("ending thumbnail export.") |
|
|
425 |
|
|
|
426 |
|
|
|
427 |
def patchify_slide_hierarchically( |
|
|
428 |
slidefile: PathLike, |
|
|
429 |
outdir: PathLike, |
|
|
430 |
top_level: int, |
|
|
431 |
low_level: int, |
|
|
432 |
psize: Coord, |
|
|
433 |
interval: Coord = (0, 0), |
|
|
434 |
offset: Coord = (0, 0), |
|
|
435 |
filters: Optional[FilterList] = None, |
|
|
436 |
silent: Optional[Sequence[int]] = None, |
|
|
437 |
erase_tree: Optional[bool] = None, |
|
|
438 |
thumb_size: int = 512, |
|
|
439 |
slide_filters: Optional[Sequence[Filter]] = None, |
|
|
440 |
verbose: int = 2, |
|
|
441 |
backend: str = "openslide", |
|
|
442 |
): |
|
|
443 |
""" |
|
|
444 |
Save patches of a given wsi in a hierarchical way. |
|
|
445 |
|
|
|
446 |
Args: |
|
|
447 |
slidefile: abs path to a slide file. |
|
|
448 |
outdir: abs path to an output folder. |
|
|
449 |
top_level: top pyramid level to consider. |
|
|
450 |
low_level: lowest pyramid level to consider. |
|
|
451 |
psize: (w, h) size of the patches (in pixels). |
|
|
452 |
interval: (x, y) interval between 2 neighboring patches. |
|
|
453 |
offset: (x, y) offset in px on x and y axis for patch start. |
|
|
454 |
filters: filters to accept patches. |
|
|
455 |
silent: pyramid level not to output. |
|
|
456 |
erase_tree: whether to erase outfolder if it exists. If None, user will be |
|
|
457 |
prompted for a choice. |
|
|
458 |
thumb_size: size of thumbnail's longest side. Always preserves aspect ratio. |
|
|
459 |
slide_filters: list of filters to apply to thumbnail. Should output boolean |
|
|
460 |
mask. |
|
|
461 |
verbose: 0 => nada, 1 => patchifying parameters, 2 => start-end of processes, |
|
|
462 |
thumbnail export. |
|
|
463 |
backend: whether to use openslide or cucim as backend. |
|
|
464 |
|
|
|
465 |
""" |
|
|
466 |
filters = ifnone(filters, {}) |
|
|
467 |
silent = ifnone(silent, []) |
|
|
468 |
level_filters = standardize_filters(filters, top_level, low_level) |
|
|
469 |
# Get name of the slide |
|
|
470 |
slide_id = slide_basename(slidefile) |
|
|
471 |
# if output directory has the same name, it's ok |
|
|
472 |
if os.path.basename(outdir) == slide_id: |
|
|
473 |
slide_folder_output = outdir |
|
|
474 |
# otherwise, create a subfolder with the name of the slide |
|
|
475 |
else: |
|
|
476 |
slide_folder_output = os.path.join(outdir, slide_id) |
|
|
477 |
if os.path.isdir(slide_folder_output): |
|
|
478 |
erase_tree = safe_rmtree( |
|
|
479 |
slide_folder_output, ignore_errors=True, erase_tree=erase_tree |
|
|
480 |
) |
|
|
481 |
os.makedirs(slide_folder_output, exist_ok=True) |
|
|
482 |
|
|
|
483 |
csv_columns = Patch.get_fields() |
|
|
484 |
csv_path = os.path.join(slide_folder_output, "patches.csv") |
|
|
485 |
|
|
|
486 |
slide = Slide(slidefile, backend=backend) |
|
|
487 |
|
|
|
488 |
with open(csv_path, "w") as csvfile: |
|
|
489 |
writer = csv.DictWriter(csvfile, csv_columns) |
|
|
490 |
writer.writeheader() |
|
|
491 |
plist = [] |
|
|
492 |
current_plist = [] |
|
|
493 |
for level in range(top_level, low_level - 1, -1): |
|
|
494 |
if verbose > 0: |
|
|
495 |
print("patchifying: {}".format(slidefile)) |
|
|
496 |
if verbose > 1: |
|
|
497 |
print("level: {}".format(level)) |
|
|
498 |
print("patch-size: {}".format(psize)) |
|
|
499 |
print("interval: {}".format(interval)) |
|
|
500 |
print("offset: {}".format(offset)) |
|
|
501 |
print("filtering: {}".format(level_filters[level])) |
|
|
502 |
print("ancestors: {} patches".format(len(plist))) |
|
|
503 |
print("starting patchification...") |
|
|
504 |
current_plist = [] |
|
|
505 |
# level directory |
|
|
506 |
outleveldir = os.path.join(slide_folder_output, "level_{}".format(level)) |
|
|
507 |
if os.path.isdir(outleveldir): |
|
|
508 |
safe_rmtree(outleveldir, ignore_errors=True, erase_tree=erase_tree) |
|
|
509 |
os.makedirs(outleveldir, exist_ok=True) |
|
|
510 |
######################## |
|
|
511 |
with warnings.catch_warnings(): |
|
|
512 |
warnings.simplefilter("ignore") |
|
|
513 |
for patch, img in slide_rois( |
|
|
514 |
slide, |
|
|
515 |
level, |
|
|
516 |
psize, |
|
|
517 |
interval, |
|
|
518 |
ancestors=plist, |
|
|
519 |
offset=offset, |
|
|
520 |
filters=level_filters[level], |
|
|
521 |
thumb_size=thumb_size, |
|
|
522 |
slide_filters=slide_filters, |
|
|
523 |
): |
|
|
524 |
if level not in silent: |
|
|
525 |
outfile = os.path.join( |
|
|
526 |
outleveldir, |
|
|
527 |
"{}_{}_{}.png".format(*patch.position, patch.level), |
|
|
528 |
) |
|
|
529 |
imsave(outfile, img) |
|
|
530 |
current_plist.append(patch) |
|
|
531 |
plist = [p for p in current_plist] |
|
|
532 |
if verbose > 1: |
|
|
533 |
print("end of patchification.") |
|
|
534 |
print("starting metadata csv export...") |
|
|
535 |
writer.writerows(map(Patch.to_csv_row, plist)) |
|
|
536 |
if verbose > 1: |
|
|
537 |
print("end of metadata export.") |
|
|
538 |
print("starting thumbnail export...") |
|
|
539 |
out_thumbnailfile = os.path.join(outleveldir, "thumbnail.png") |
|
|
540 |
thumbnail = preview_from_queries(slide, current_plist) |
|
|
541 |
imsave(out_thumbnailfile, thumbnail) |
|
|
542 |
print("ending thumbnail export.") |
|
|
543 |
|
|
|
544 |
|
|
|
545 |
def patchify_folder( |
|
|
546 |
infolder: str, |
|
|
547 |
outfolder: str, |
|
|
548 |
level: int, |
|
|
549 |
psize: Coord, |
|
|
550 |
interval: Coord = (0, 0), |
|
|
551 |
offset: Coord = (0, 0), |
|
|
552 |
filters: Optional[Sequence[Filter]] = None, |
|
|
553 |
extensions: Sequence[str] = (".mrxs",), |
|
|
554 |
recurse: bool = False, |
|
|
555 |
folders: Optional[Sequence[str]] = None, |
|
|
556 |
erase_tree: Optional[bool] = None, |
|
|
557 |
thumb_size: int = 512, |
|
|
558 |
slide_filters: Optional[Sequence[Filter]] = None, |
|
|
559 |
verbose: int = 2, |
|
|
560 |
silence: int = 0, |
|
|
561 |
backend: str = "openslide", |
|
|
562 |
): |
|
|
563 |
""" |
|
|
564 |
Save patches of all wsi inside a folder. |
|
|
565 |
|
|
|
566 |
Args: |
|
|
567 |
infolder: abs path to a folder of slides. |
|
|
568 |
outfolder: abs path to an output folder. |
|
|
569 |
level: pyramid level. |
|
|
570 |
psize: (w, h) size of the patches (in pixels). |
|
|
571 |
interval: (x, y) interval between 2 neighboring patches. |
|
|
572 |
offset: (x, y) offset in px on x and y axis for patch start. |
|
|
573 |
filters: filters to accept patches. |
|
|
574 |
extensions: list of file extensions to consider. Defaults to '.mrxs'. |
|
|
575 |
recurse: whether to look for files recursively. |
|
|
576 |
folders: list of subfolders to explore when recurse is True. Defaults to all. |
|
|
577 |
erase_tree: whether to erase outfolder if it exists. If None, user will be |
|
|
578 |
prompted for a choice. |
|
|
579 |
thumb_size: size of thumbnail's longest side. Always preserves aspect ratio. |
|
|
580 |
slide_filters: list of filters to apply to thumbnail. Should output boolean |
|
|
581 |
mask. |
|
|
582 |
verbose: 0 => nada, 1 => patchifying parameters, 2 => start-end of processes, |
|
|
583 |
thumbnail export. |
|
|
584 |
silence: 0 => write images, 1 => use images, but do not write, 2 => not even |
|
|
585 |
using images for filtering. |
|
|
586 |
backend: whether to use openslide or cucim as backend. |
|
|
587 |
|
|
|
588 |
""" |
|
|
589 |
if os.path.isdir(outfolder): |
|
|
590 |
erase_tree = safe_rmtree(outfolder, ignore_errors=True, erase_tree=erase_tree) |
|
|
591 |
slidefiles = get_files( |
|
|
592 |
infolder, extensions=extensions, recurse=recurse, folders=folders |
|
|
593 |
).map(str) |
|
|
594 |
slidefiles = slides_in_folder(infolder) |
|
|
595 |
total = len(slidefiles) |
|
|
596 |
k = 0 |
|
|
597 |
for slidefile in slidefiles: |
|
|
598 |
k += 1 |
|
|
599 |
if verbose > 0: |
|
|
600 |
print("slide {} / {}".format(k, total)) |
|
|
601 |
slidename = slide_basename(slidefile) |
|
|
602 |
outdir = os.path.join(outfolder, slidename) |
|
|
603 |
if os.path.isdir(outdir): |
|
|
604 |
safe_rmtree(outdir, ignore_errors=True, erase_tree=erase_tree) |
|
|
605 |
os.makedirs(outdir, exist_ok=True) |
|
|
606 |
# patchify folder must be robust to 'missing image data' rare cases... |
|
|
607 |
try: |
|
|
608 |
patchify_slide( |
|
|
609 |
slidefile, |
|
|
610 |
outdir, |
|
|
611 |
level, |
|
|
612 |
psize, |
|
|
613 |
interval, |
|
|
614 |
offset=offset, |
|
|
615 |
filters=filters, |
|
|
616 |
erase_tree=erase_tree, |
|
|
617 |
thumb_size=thumb_size, |
|
|
618 |
slide_filters=slide_filters, |
|
|
619 |
verbose=verbose, |
|
|
620 |
silence=silence, |
|
|
621 |
backend=backend, |
|
|
622 |
) |
|
|
623 |
except ( |
|
|
624 |
openslide.OpenSlideUnsupportedFormatError, |
|
|
625 |
openslide.lowlevel.OpenSlideError, |
|
|
626 |
) as e: |
|
|
627 |
warnings.warn(str(e)) |
|
|
628 |
|
|
|
629 |
|
|
|
630 |
def patchify_folder_hierarchically( |
|
|
631 |
infolder: PathLike, |
|
|
632 |
outfolder: PathLike, |
|
|
633 |
top_level: int, |
|
|
634 |
low_level: int, |
|
|
635 |
psize: Coord, |
|
|
636 |
interval: Coord = (0, 0), |
|
|
637 |
offset: Coord = (0, 0), |
|
|
638 |
filters: Optional[FilterList] = None, |
|
|
639 |
silent: Optional[Sequence[int]] = None, |
|
|
640 |
extensions: Sequence[str] = (".mrxs",), |
|
|
641 |
recurse: bool = False, |
|
|
642 |
folders: Optional[Sequence[str]] = None, |
|
|
643 |
erase_tree: Optional[bool] = None, |
|
|
644 |
thumb_size: int = 512, |
|
|
645 |
slide_filters: Optional[Sequence[Filter]] = None, |
|
|
646 |
verbose: int = 2, |
|
|
647 |
backend: str = "openslide", |
|
|
648 |
): |
|
|
649 |
""" |
|
|
650 |
Save hierarchical patches of all wsi inside a folder. |
|
|
651 |
|
|
|
652 |
Args: |
|
|
653 |
infolder: abs path to a folder of slides. |
|
|
654 |
outfolder: abs path to an output folder. |
|
|
655 |
top_level: top pyramid level to consider. |
|
|
656 |
low_level: lowest pyramid level to consider. |
|
|
657 |
psize: (w, h) size of the patches (in pixels). |
|
|
658 |
interval: (x, y) interval between 2 neighboring patches. |
|
|
659 |
offset: (x, y) offset in px on x and y axis for patch start. |
|
|
660 |
filters: filters to accept patches. |
|
|
661 |
silent: pyramid level not to output. |
|
|
662 |
extensions: list of file extensions to consider. Defaults to '.mrxs'. |
|
|
663 |
recurse: whether to look for files recursively. |
|
|
664 |
folders: list of subfolders to explore when recurse is True. Defaults to all. |
|
|
665 |
erase_tree: whether to erase outfolder if it exists. If None, user will be |
|
|
666 |
prompted for a choice. |
|
|
667 |
verbose: 0 => nada, 1 => patchifying parameters, 2 => start-end of processes, |
|
|
668 |
thumbnail export. |
|
|
669 |
backend: whether to use openslide or cucim as backend. |
|
|
670 |
|
|
|
671 |
""" |
|
|
672 |
if os.path.isdir(outfolder): |
|
|
673 |
erase_tree = safe_rmtree(outfolder, ignore_errors=True, erase_tree=erase_tree) |
|
|
674 |
slidefiles = get_files( |
|
|
675 |
infolder, extensions=extensions, recurse=recurse, folders=folders |
|
|
676 |
).map(str) |
|
|
677 |
total = len(slidefiles) |
|
|
678 |
k = 0 |
|
|
679 |
for slidefile in slidefiles: |
|
|
680 |
k += 1 |
|
|
681 |
if verbose > 0: |
|
|
682 |
print("slide {} / {}".format(k, total)) |
|
|
683 |
slidename = slide_basename(slidefile) |
|
|
684 |
outdir = os.path.join(outfolder, slidename) |
|
|
685 |
if os.path.isdir(outdir): |
|
|
686 |
safe_rmtree(outdir, ignore_errors=True, erase_tree=erase_tree) |
|
|
687 |
os.makedirs(outdir, exist_ok=True) |
|
|
688 |
try: |
|
|
689 |
patchify_slide_hierarchically( |
|
|
690 |
slidefile, |
|
|
691 |
outdir, |
|
|
692 |
top_level, |
|
|
693 |
low_level, |
|
|
694 |
psize, |
|
|
695 |
interval, |
|
|
696 |
offset=offset, |
|
|
697 |
filters=filters, |
|
|
698 |
silent=silent, |
|
|
699 |
erase_tree=erase_tree, |
|
|
700 |
thumb_size=thumb_size, |
|
|
701 |
slide_filters=slide_filters, |
|
|
702 |
verbose=verbose, |
|
|
703 |
backend=backend, |
|
|
704 |
) |
|
|
705 |
except ( |
|
|
706 |
openslide.OpenSlideUnsupportedFormatError, |
|
|
707 |
openslide.lowlevel.OpenSlideError, |
|
|
708 |
) as e: |
|
|
709 |
warnings.warn(str(e)) |
|
|
710 |
|
|
|
711 |
|
|
|
712 |
def export_floatpred_to_categorical_micromap_json( |
|
|
713 |
pathaiafolder: PathLike, |
|
|
714 |
slidefolder: PathLike, |
|
|
715 |
jsonfolder: PathLike, |
|
|
716 |
level: int, |
|
|
717 |
task: str, |
|
|
718 |
thresholds: Dict[int, Tuple[float, float]], |
|
|
719 |
classnames: Dict[int, str], |
|
|
720 |
extensions: Sequence[str] = (".mrxs",), |
|
|
721 |
recurse: bool = True, |
|
|
722 |
): |
|
|
723 |
""" |
|
|
724 |
Export pathaia csv to a json annotation file compatible with MicroMap. |
|
|
725 |
|
|
|
726 |
For each predicted category, a layer is created and connected components |
|
|
727 |
are computed. |
|
|
728 |
|
|
|
729 |
Args: |
|
|
730 |
infolder: abs path to a folder of pathaia csv. |
|
|
731 |
slidefolder: abs path to a folder of slides. |
|
|
732 |
jsonfolder: abs path to a folder of json annotations. |
|
|
733 |
level: level of patches to export. |
|
|
734 |
task: prediction task to export (csv column). |
|
|
735 |
thresholds: histogram bins for each category. |
|
|
736 |
classnames: class names for each category. |
|
|
737 |
extensions: slide extension. |
|
|
738 |
|
|
|
739 |
""" |
|
|
740 |
color_dict = {k: colorCycle[k] for k in classnames} |
|
|
741 |
slidefiles = get_files(slidefolder, extensions=extensions, recurse=recurse).map(str) |
|
|
742 |
for slidefile in tqdm(slidefiles): |
|
|
743 |
slidename, _ = os.path.splitext(os.path.basename(slidefile)) |
|
|
744 |
slidedir = os.path.dirname(slidefile) |
|
|
745 |
pathaia_slidedir = slidedir.replace(slidefolder, pathaiafolder) |
|
|
746 |
pathaiacsv = os.path.join(pathaia_slidedir, slidename, "patches.csv") |
|
|
747 |
json_slidedir = slidedir.replace(slidefolder, jsonfolder) |
|
|
748 |
pathaiajson = os.path.join(json_slidedir, "{}.json".format(slidename)) |
|
|
749 |
try: |
|
|
750 |
slide = openslide.OpenSlide(slidefile) |
|
|
751 |
# print(f'Saving to json {pathaiajson} from {pathaiacsv}') |
|
|
752 |
gen = gen_categorical_from_floatpreds(pathaiacsv, level, task, thresholds) |
|
|
753 |
layer_edges, layer_meta, interval = get_categorical_layer_edges( |
|
|
754 |
gen, color_dict, classnames |
|
|
755 |
) |
|
|
756 |
layer_segments = get_categorical_segments_from_edges(layer_edges) |
|
|
757 |
annotations = layer_segment_to_json_struct( |
|
|
758 |
interval, layer_segments, layer_meta, slide |
|
|
759 |
) |
|
|
760 |
with open(pathaiajson, "w") as f: |
|
|
761 |
json.dump(annotations, f) |
|
|
762 |
except Exception as e: |
|
|
763 |
warnings.warn( |
|
|
764 |
"slide '{}'" |
|
|
765 |
" with patch file '{}'" |
|
|
766 |
" failed with error: '{}'".format(slidefile, pathaiacsv, str(e)) |
|
|
767 |
) |