Switch to unified view

a b/pathaia/patches/functional_api.py
1
# coding: utf8
2
"""
3
A module to extract patches in a slide.
4
5
Enable filtering on tissue surface ratio.
6
Draft for hierarchical patch extraction and representation is proposed.
7
"""
8
import warnings
9
import numpy
10
import openslide
11
from ..util.paths import slides_in_folder, slide_basename, safe_rmtree, get_files
12
from ..util.images import regular_grid, get_coords_from_mask
13
from ..util.basic import ifnone
14
from ..util.types import (
15
    Filter,
16
    FilterList,
17
    PathLike,
18
    NDImage,
19
    NDBoolMask,
20
    Coord,
21
    Patch,
22
    Slide,
23
)
24
from .visu import preview_from_queries
25
from .filters import (
26
    filter_hasdapi,
27
    filter_has_significant_dapi,
28
    filter_has_tissue_he,
29
    standardize_filters,
30
)
31
from .slide_filters import filter_thumbnail, filter_fluo_thumbnail
32
from .compat import convert_coords
33
import os
34
import csv
35
from skimage.io import imsave
36
from tqdm import tqdm
37
from .errors import UnknownFilterError, InvalidArgument
38
from typing import Optional, Sequence, Tuple, Iterator, Dict
39
from ..util.convert import (
40
    get_categorical_segments_from_edges,
41
    get_categorical_layer_edges,
42
    layer_segment_to_json_struct,
43
    gen_categorical_from_floatpreds,
44
    colorCycle,
45
)
46
import json
47
from fastcore.basics import listify
48
49
50
izi_filters = {
51
    "has-dapi": filter_hasdapi,
52
    "has-significant-dapi": filter_has_significant_dapi,
53
    "has-tissue-he": filter_has_tissue_he,
54
}
55
56
slide_filters = {"full": filter_thumbnail, "fluo": filter_fluo_thumbnail}
57
58
59
def filter_image(image: NDImage, filters: Sequence[Filter]) -> bool:
60
    """
61
    Check multiple filters on an image.
62
63
    Args:
64
        image: the patch to be filtered.
65
        filters: functions that turn images into booleans.
66
67
    Returns:
68
        Whether an image has passed every filters.
69
70
    """
71
    for filt in filters:
72
        if callable(filt):
73
            if not filt(image):
74
                return False
75
        elif type(filt) == str:
76
            if filt not in izi_filters:
77
                raise UnknownFilterError("{} is not a valid filter !!!".format(filt))
78
            if not izi_filters[filt](image):
79
                return False
80
        else:
81
            raise UnknownFilterError("{} is not a valid filter !!!".format(filt))
82
    return True
83
84
85
def apply_slide_filters(thumb: NDImage, filters: Sequence[Filter]) -> NDBoolMask:
86
    """
87
    Apply all filters to input thumbnail. Performs logical and between output masks.
88
89
    Args:
90
        thumb: thumbnail to compute mask from.
91
        filters: list of filters to apply to thumb. Each
92
            filter should output a boolean mask with same dimensions as thumb.
93
94
    Returns:
95
        Boolean mask where tissue pixels are True.
96
97
    """
98
    mask = numpy.ones(thumb.shape[:2], dtype=bool)
99
    for filt in filters:
100
        if isinstance(filt, str):
101
            if filt not in slide_filters:
102
                raise UnknownFilterError("{} is not a valid filter !!!".format(filt))
103
            filt = slide_filters[filt]
104
        mask = mask & filt(thumb)
105
    return mask
106
107
108
def slide_rois(
109
    slide: Slide,
110
    level: int,
111
    psize: Coord,
112
    interval: Coord = (0, 0),
113
    ancestors: Optional[Sequence[Patch]] = None,
114
    offset: Coord = (0, 0),
115
    filters: Optional[Sequence[Filter]] = None,
116
    thumb_size: int = 512,
117
    slide_filters: Optional[Sequence[Filter]] = None,
118
) -> Iterator[Tuple[Patch, NDImage]]:
119
    """
120
    Get patches with coordinates.
121
122
    Given a slide, a pyramid level, a patchsize in pixels, an interval in pixels
123
    and an offset in pixels, get patches with its coordinates.
124
125
    Args:
126
        slide: the slide to patchify.
127
        level: pyramid level.
128
        psize: (w, h) size of the patches (in pixels).
129
        interval: (x, y) interval between 2 neighboring patches.
130
        ancestors: patches that contain upcoming patches.
131
        offset: (x, y) offset in px on x and y axis for patch start.
132
        filters: filters to accept patches.
133
        thumb_size: size of thumbnail's longest side. Always preserves aspect ratio.
134
        slide_filters: list of filters to apply to thumbnail. Should output boolean mask.
135
136
    Yields:
137
        A tuple containing a Patch object and the corresponding image as
138
        ndarray.
139
140
    """
141
    psize = convert_coords(psize)
142
    offset = convert_coords(offset)
143
    ancestors = ifnone(ancestors, [])
144
    filters = listify(filters)
145
    slide_filters = listify(slide_filters)
146
    if len(ancestors) > 0:
147
        mag = slide.level_downsamples[level]
148
        shape = Coord(ancestors[0].size_0) / mag
149
        size_0 = psize * mag
150
        patches = []
151
        for ancestor in ancestors:
152
            # ancestor is a patch
153
            prefix = ancestor.id
154
            k = 0
155
            for patch_coord in regular_grid(shape, interval, psize):
156
                k += 1
157
                idx = "{}#{}".format(prefix, k)
158
                position = patch_coord * mag + ancestor.position
159
                patches.append(
160
                    Patch(
161
                        id=idx,
162
                        slidename=slide._filename.split("/")[-1],
163
                        position=position,
164
                        level=level,
165
                        size=psize,
166
                        size_0=size_0,
167
                        parent=ancestor,
168
                    )
169
                )
170
        for patch in tqdm(patches, ascii=True):
171
            try:
172
                image = slide.read_region(patch.position, patch.level, patch.size)
173
                image = numpy.array(image)[:, :, 0:3]
174
                if filter_image(image, filters):
175
                    yield patch, image
176
            except openslide.lowlevel.OpenSlideError:
177
                print(
178
                    "small failure while reading tile x={}, y={} in {}".format(
179
                        *patch.position, slide._filename
180
                    )
181
                )
182
    else:
183
        shape = Coord(*slide.level_dimensions[level])
184
        mag = slide.level_downsamples[level]
185
        thumb = numpy.array(slide.get_thumbnail((thumb_size, thumb_size)))
186
        mask = apply_slide_filters(thumb, slide_filters)
187
        k = 0
188
        for patch_coord in get_coords_from_mask(mask, shape, interval, psize):
189
            k += 1
190
            idx = "#{}".format(k)
191
            position = patch_coord * mag + offset
192
            size_0 = psize * mag
193
            try:
194
                image = slide.read_region(position, level, psize)
195
                image = numpy.array(image)[:, :, 0:3]
196
                if filter_image(image, filters):
197
                    yield Patch(
198
                        id=idx,
199
                        slidename=slide._filename.split("/")[-1],
200
                        position=position,
201
                        level=level,
202
                        size=psize,
203
                        size_0=size_0,
204
                    ), image
205
            except openslide.lowlevel.OpenSlideError:
206
                print(
207
                    "small failure while reading tile x={}, y={} in {}".format(
208
                        *position, slide._filename
209
                    )
210
                )
211
212
213
def slide_rois_no_image(
214
    slide: Slide,
215
    level: int,
216
    psize: Coord,
217
    interval: Coord = (0, 0),
218
    ancestors: Optional[Sequence[Patch]] = None,
219
    offset: Coord = (0, 0),
220
    thumb_size: int = 512,
221
    slide_filters: Optional[Sequence[Filter]] = None,
222
) -> Iterator[Patch]:
223
    """
224
    Get patches with coordinates.
225
226
    Given a slide, a pyramid level, a patchsize in pixels, an interval in pixels
227
    and an offset in pixels, get patches with its coordinates. Does not export image at
228
    any point.
229
230
    Args:
231
        slide: the slide to patchify.
232
        level: pyramid level.
233
        psize: (w, h) size of the patches (in pixels).
234
        interval: (x, y) interval between 2 neighboring patches.
235
        ancestors: patches that contain upcoming patches.
236
        offset: (x, y) offset in px on x and y axis for patch start.
237
        thumb_size: size of thumbnail's longest side. Always preserves aspect ratio.
238
        slide_filters: list of filters to apply to thumbnail. Should output boolean mask.
239
240
    Yields:
241
        A tuple containing a Patch object and the corresponding image as
242
        ndarray.
243
244
    """
245
    psize = convert_coords(psize)
246
    offset = convert_coords(offset)
247
    ancestors = ifnone(ancestors, [])
248
    slide_filters = listify(slide_filters)
249
    if len(ancestors) > 0:
250
        mag = slide.level_downsamples[level]
251
        shape = Coord(ancestors[0].size_0) / mag
252
        size_0 = psize * mag
253
        for ancestor in ancestors:
254
            # ancestor is a patch
255
            prefix = ancestor.id
256
            k = 0
257
            for patch_coord in regular_grid(shape, interval, psize):
258
                k += 1
259
                idx = "{}#{}".format(prefix, k)
260
                position = patch_coord * mag + ancestor.position
261
                yield Patch(
262
                    id=idx,
263
                    slidename=slide._filename.split("/")[-1],
264
                    position=position,
265
                    level=level,
266
                    size=psize,
267
                    size_0=size_0,
268
                    parent=ancestor,
269
                )
270
    else:
271
        shape = Coord(*slide.level_dimensions[level])
272
        mag = slide.level_downsamples[level]
273
        thumb = numpy.array(slide.get_thumbnail((thumb_size, thumb_size)))
274
        mask = apply_slide_filters(thumb, slide_filters)
275
        k = 0
276
        for patch_coord in get_coords_from_mask(mask, shape, interval, psize):
277
            k += 1
278
            idx = "#{}".format(k)
279
            position = patch_coord * mag + offset
280
            size_0 = psize * mag
281
            yield Patch(
282
                id=idx,
283
                slidename=slide._filename.split("/")[-1],
284
                position=position,
285
                level=level,
286
                size=psize,
287
                size_0=size_0,
288
            )
289
290
291
def patchify_slide(
292
    slidefile: PathLike,
293
    outdir: PathLike,
294
    level: int,
295
    psize: Coord,
296
    interval: Coord = (0, 0),
297
    offset: Coord = (0, 0),
298
    filters: Optional[Sequence[Filter]] = None,
299
    erase_tree: Optional[bool] = None,
300
    thumb_size: int = 512,
301
    slide_filters: Optional[Sequence[Filter]] = None,
302
    verbose: int = 2,
303
    silence: int = 0,
304
    backend: str = "openslide",
305
):
306
    """
307
    Save patches of a given wsi.
308
309
    Args:
310
        slidefile: abs path to slide file.
311
        outdir: abs path to an output folder.
312
        level: pyramid level.
313
        psize: (w, h) size of the patches (in pixels).
314
        interval: (x, y) interval between 2 neighboring patches.
315
        offset: (x, y) offset in px on x and y axis for patch start.
316
        filters: filters to accept patches.
317
        erase_tree: whether to erase outfolder if it exists. If None, user will be
318
            prompted for a choice.
319
        thumb_size: size of thumbnail's longest side. Always preserves aspect ratio.
320
        slide_filters: list of filters to apply to thumbnail. Should output boolean
321
            mask.
322
        verbose: 0 => nada, 1 => patchifying parameters, 2 => start-end of
323
            processes, thumbnail export.
324
        silence: 0 => write images, 1 => use images, but do not write, 2 => not even
325
            using images for filtering.
326
        backend: whether to use openslide or cucim as backend.
327
328
    """
329
    # Get name of the slide
330
    slide_id = slide_basename(slidefile)
331
    # if output directory has the same name, it's ok
332
    if os.path.basename(outdir) == slide_id:
333
        slide_folder_output = outdir
334
    # otherwise, create a subfolder with the name of the slide
335
    else:
336
        slide_folder_output = os.path.join(outdir, slide_id)
337
        if os.path.isdir(slide_folder_output):
338
            erase_tree = safe_rmtree(
339
                slide_folder_output, ignore_errors=True, erase_tree=erase_tree
340
            )
341
        os.makedirs(slide_folder_output, exist_ok=True)
342
343
    if verbose > 0:
344
        print("patchifying: {}".format(slidefile))
345
        if verbose > 1:
346
            print("level: {}".format(level))
347
            print("patch-size: {}".format(psize))
348
            print("interval: {}".format(interval))
349
            print("offset: {}".format(offset))
350
            print("filtering: {}".format(filters))
351
            print("starting patchification...")
352
    slide = Slide(slidefile, backend=backend)
353
    plist = []
354
    # level directory
355
    outleveldir = os.path.join(slide_folder_output, "level_{}".format(level))
356
    if os.path.isdir(outleveldir):
357
        safe_rmtree(outleveldir, ignore_errors=True, erase_tree=erase_tree)
358
    os.makedirs(outleveldir, exist_ok=True)
359
    ########################
360
    with warnings.catch_warnings():
361
        warnings.simplefilter("ignore")
362
        if silence == 0:
363
            # Write images
364
            for patch, img in slide_rois(
365
                slide,
366
                level,
367
                psize,
368
                interval,
369
                offset=offset,
370
                filters=filters,
371
                thumb_size=thumb_size,
372
                slide_filters=slide_filters,
373
            ):
374
                outfile = os.path.join(
375
                    outleveldir, "{}_{}_{}.png".format(*patch.position, patch.level)
376
                )
377
                imsave(outfile, img)
378
                plist.append(patch)
379
        elif silence == 1:
380
            # Do not write patches (but use images to filter)
381
            for patch, img in slide_rois(
382
                slide,
383
                level,
384
                psize,
385
                interval,
386
                offset=offset,
387
                filters=filters,
388
                thumb_size=thumb_size,
389
                slide_filters=slide_filters,
390
            ):
391
                plist.append(patch)
392
        elif silence == 2:
393
            # Do not write images and do not even use them to filter patches
394
            for patch in slide_rois_no_image(
395
                slide,
396
                level,
397
                psize,
398
                interval,
399
                offset=offset,
400
                thumb_size=thumb_size,
401
                slide_filters=slide_filters,
402
            ):
403
                plist.append(patch)
404
        else:
405
            raise InvalidArgument(
406
                "Invalid 'silence' parameter: '{}',"
407
                " should be one of [0, 1, 2]".format(silence)
408
            )
409
    if verbose > 1:
410
        print("end of patchification.")
411
        print("starting metadata csv export...")
412
    csv_columns = Patch.get_fields()
413
    csv_path = os.path.join(slide_folder_output, "patches.csv")
414
    with open(csv_path, "w") as csvfile:
415
        writer = csv.DictWriter(csvfile, csv_columns)
416
        writer.writeheader()
417
        writer.writerows(map(Patch.to_csv_row, plist))
418
    if verbose > 1:
419
        print("end of metadata export.")
420
        print("starting thumbnail export...")
421
        out_thumbnailfile = os.path.join(outleveldir, "thumbnail.png")
422
        thumbnail = preview_from_queries(slide, plist)
423
        imsave(out_thumbnailfile, thumbnail)
424
        print("ending thumbnail export.")
425
426
427
def patchify_slide_hierarchically(
428
    slidefile: PathLike,
429
    outdir: PathLike,
430
    top_level: int,
431
    low_level: int,
432
    psize: Coord,
433
    interval: Coord = (0, 0),
434
    offset: Coord = (0, 0),
435
    filters: Optional[FilterList] = None,
436
    silent: Optional[Sequence[int]] = None,
437
    erase_tree: Optional[bool] = None,
438
    thumb_size: int = 512,
439
    slide_filters: Optional[Sequence[Filter]] = None,
440
    verbose: int = 2,
441
    backend: str = "openslide",
442
):
443
    """
444
    Save patches of a given wsi in a hierarchical way.
445
446
    Args:
447
        slidefile: abs path to a slide file.
448
        outdir: abs path to an output folder.
449
        top_level: top pyramid level to consider.
450
        low_level: lowest pyramid level to consider.
451
        psize: (w, h) size of the patches (in pixels).
452
        interval: (x, y) interval between 2 neighboring patches.
453
        offset: (x, y) offset in px on x and y axis for patch start.
454
        filters: filters to accept patches.
455
        silent: pyramid level not to output.
456
        erase_tree: whether to erase outfolder if it exists. If None, user will be
457
            prompted for a choice.
458
        thumb_size: size of thumbnail's longest side. Always preserves aspect ratio.
459
        slide_filters: list of filters to apply to thumbnail. Should output boolean
460
            mask.
461
        verbose: 0 => nada, 1 => patchifying parameters, 2 => start-end of processes,
462
            thumbnail export.
463
        backend: whether to use openslide or cucim as backend.
464
465
    """
466
    filters = ifnone(filters, {})
467
    silent = ifnone(silent, [])
468
    level_filters = standardize_filters(filters, top_level, low_level)
469
    # Get name of the slide
470
    slide_id = slide_basename(slidefile)
471
    # if output directory has the same name, it's ok
472
    if os.path.basename(outdir) == slide_id:
473
        slide_folder_output = outdir
474
    # otherwise, create a subfolder with the name of the slide
475
    else:
476
        slide_folder_output = os.path.join(outdir, slide_id)
477
        if os.path.isdir(slide_folder_output):
478
            erase_tree = safe_rmtree(
479
                slide_folder_output, ignore_errors=True, erase_tree=erase_tree
480
            )
481
        os.makedirs(slide_folder_output, exist_ok=True)
482
483
    csv_columns = Patch.get_fields()
484
    csv_path = os.path.join(slide_folder_output, "patches.csv")
485
486
    slide = Slide(slidefile, backend=backend)
487
488
    with open(csv_path, "w") as csvfile:
489
        writer = csv.DictWriter(csvfile, csv_columns)
490
        writer.writeheader()
491
        plist = []
492
        current_plist = []
493
        for level in range(top_level, low_level - 1, -1):
494
            if verbose > 0:
495
                print("patchifying: {}".format(slidefile))
496
                if verbose > 1:
497
                    print("level: {}".format(level))
498
                    print("patch-size: {}".format(psize))
499
                    print("interval: {}".format(interval))
500
                    print("offset: {}".format(offset))
501
                    print("filtering: {}".format(level_filters[level]))
502
                    print("ancestors: {} patches".format(len(plist)))
503
                    print("starting patchification...")
504
            current_plist = []
505
            # level directory
506
            outleveldir = os.path.join(slide_folder_output, "level_{}".format(level))
507
            if os.path.isdir(outleveldir):
508
                safe_rmtree(outleveldir, ignore_errors=True, erase_tree=erase_tree)
509
            os.makedirs(outleveldir, exist_ok=True)
510
            ########################
511
            with warnings.catch_warnings():
512
                warnings.simplefilter("ignore")
513
                for patch, img in slide_rois(
514
                    slide,
515
                    level,
516
                    psize,
517
                    interval,
518
                    ancestors=plist,
519
                    offset=offset,
520
                    filters=level_filters[level],
521
                    thumb_size=thumb_size,
522
                    slide_filters=slide_filters,
523
                ):
524
                    if level not in silent:
525
                        outfile = os.path.join(
526
                            outleveldir,
527
                            "{}_{}_{}.png".format(*patch.position, patch.level),
528
                        )
529
                        imsave(outfile, img)
530
                    current_plist.append(patch)
531
            plist = [p for p in current_plist]
532
            if verbose > 1:
533
                print("end of patchification.")
534
                print("starting metadata csv export...")
535
            writer.writerows(map(Patch.to_csv_row, plist))
536
            if verbose > 1:
537
                print("end of metadata export.")
538
                print("starting thumbnail export...")
539
                out_thumbnailfile = os.path.join(outleveldir, "thumbnail.png")
540
                thumbnail = preview_from_queries(slide, current_plist)
541
                imsave(out_thumbnailfile, thumbnail)
542
                print("ending thumbnail export.")
543
544
545
def patchify_folder(
546
    infolder: str,
547
    outfolder: str,
548
    level: int,
549
    psize: Coord,
550
    interval: Coord = (0, 0),
551
    offset: Coord = (0, 0),
552
    filters: Optional[Sequence[Filter]] = None,
553
    extensions: Sequence[str] = (".mrxs",),
554
    recurse: bool = False,
555
    folders: Optional[Sequence[str]] = None,
556
    erase_tree: Optional[bool] = None,
557
    thumb_size: int = 512,
558
    slide_filters: Optional[Sequence[Filter]] = None,
559
    verbose: int = 2,
560
    silence: int = 0,
561
    backend: str = "openslide",
562
):
563
    """
564
    Save patches of all wsi inside a folder.
565
566
    Args:
567
        infolder: abs path to a folder of slides.
568
        outfolder: abs path to an output folder.
569
        level: pyramid level.
570
        psize: (w, h) size of the patches (in pixels).
571
        interval: (x, y) interval between 2 neighboring patches.
572
        offset: (x, y) offset in px on x and y axis for patch start.
573
        filters: filters to accept patches.
574
        extensions: list of file extensions to consider. Defaults to '.mrxs'.
575
        recurse: whether to look for files recursively.
576
        folders: list of subfolders to explore when recurse is True. Defaults to all.
577
        erase_tree: whether to erase outfolder if it exists. If None, user will be
578
            prompted for a choice.
579
        thumb_size: size of thumbnail's longest side. Always preserves aspect ratio.
580
        slide_filters: list of filters to apply to thumbnail. Should output boolean
581
            mask.
582
        verbose: 0 => nada, 1 => patchifying parameters, 2 => start-end of processes,
583
            thumbnail export.
584
        silence: 0 => write images, 1 => use images, but do not write, 2 => not even
585
            using images for filtering.
586
        backend: whether to use openslide or cucim as backend.
587
588
    """
589
    if os.path.isdir(outfolder):
590
        erase_tree = safe_rmtree(outfolder, ignore_errors=True, erase_tree=erase_tree)
591
    slidefiles = get_files(
592
        infolder, extensions=extensions, recurse=recurse, folders=folders
593
    ).map(str)
594
    slidefiles = slides_in_folder(infolder)
595
    total = len(slidefiles)
596
    k = 0
597
    for slidefile in slidefiles:
598
        k += 1
599
        if verbose > 0:
600
            print("slide {} / {}".format(k, total))
601
        slidename = slide_basename(slidefile)
602
        outdir = os.path.join(outfolder, slidename)
603
        if os.path.isdir(outdir):
604
            safe_rmtree(outdir, ignore_errors=True, erase_tree=erase_tree)
605
        os.makedirs(outdir, exist_ok=True)
606
        # patchify folder must be robust to 'missing image data' rare cases...
607
        try:
608
            patchify_slide(
609
                slidefile,
610
                outdir,
611
                level,
612
                psize,
613
                interval,
614
                offset=offset,
615
                filters=filters,
616
                erase_tree=erase_tree,
617
                thumb_size=thumb_size,
618
                slide_filters=slide_filters,
619
                verbose=verbose,
620
                silence=silence,
621
                backend=backend,
622
            )
623
        except (
624
            openslide.OpenSlideUnsupportedFormatError,
625
            openslide.lowlevel.OpenSlideError,
626
        ) as e:
627
            warnings.warn(str(e))
628
629
630
def patchify_folder_hierarchically(
631
    infolder: PathLike,
632
    outfolder: PathLike,
633
    top_level: int,
634
    low_level: int,
635
    psize: Coord,
636
    interval: Coord = (0, 0),
637
    offset: Coord = (0, 0),
638
    filters: Optional[FilterList] = None,
639
    silent: Optional[Sequence[int]] = None,
640
    extensions: Sequence[str] = (".mrxs",),
641
    recurse: bool = False,
642
    folders: Optional[Sequence[str]] = None,
643
    erase_tree: Optional[bool] = None,
644
    thumb_size: int = 512,
645
    slide_filters: Optional[Sequence[Filter]] = None,
646
    verbose: int = 2,
647
    backend: str = "openslide",
648
):
649
    """
650
    Save hierarchical patches of all wsi inside a folder.
651
652
    Args:
653
        infolder: abs path to a folder of slides.
654
        outfolder: abs path to an output folder.
655
        top_level: top pyramid level to consider.
656
        low_level: lowest pyramid level to consider.
657
        psize: (w, h) size of the patches (in pixels).
658
        interval: (x, y) interval between 2 neighboring patches.
659
        offset: (x, y) offset in px on x and y axis for patch start.
660
        filters: filters to accept patches.
661
        silent: pyramid level not to output.
662
        extensions: list of file extensions to consider. Defaults to '.mrxs'.
663
        recurse: whether to look for files recursively.
664
        folders: list of subfolders to explore when recurse is True. Defaults to all.
665
        erase_tree: whether to erase outfolder if it exists. If None, user will be
666
            prompted for a choice.
667
        verbose: 0 => nada, 1 => patchifying parameters, 2 => start-end of processes,
668
            thumbnail export.
669
        backend: whether to use openslide or cucim as backend.
670
671
    """
672
    if os.path.isdir(outfolder):
673
        erase_tree = safe_rmtree(outfolder, ignore_errors=True, erase_tree=erase_tree)
674
    slidefiles = get_files(
675
        infolder, extensions=extensions, recurse=recurse, folders=folders
676
    ).map(str)
677
    total = len(slidefiles)
678
    k = 0
679
    for slidefile in slidefiles:
680
        k += 1
681
        if verbose > 0:
682
            print("slide {} / {}".format(k, total))
683
        slidename = slide_basename(slidefile)
684
        outdir = os.path.join(outfolder, slidename)
685
        if os.path.isdir(outdir):
686
            safe_rmtree(outdir, ignore_errors=True, erase_tree=erase_tree)
687
        os.makedirs(outdir, exist_ok=True)
688
        try:
689
            patchify_slide_hierarchically(
690
                slidefile,
691
                outdir,
692
                top_level,
693
                low_level,
694
                psize,
695
                interval,
696
                offset=offset,
697
                filters=filters,
698
                silent=silent,
699
                erase_tree=erase_tree,
700
                thumb_size=thumb_size,
701
                slide_filters=slide_filters,
702
                verbose=verbose,
703
                backend=backend,
704
            )
705
        except (
706
            openslide.OpenSlideUnsupportedFormatError,
707
            openslide.lowlevel.OpenSlideError,
708
        ) as e:
709
            warnings.warn(str(e))
710
711
712
def export_floatpred_to_categorical_micromap_json(
713
    pathaiafolder: PathLike,
714
    slidefolder: PathLike,
715
    jsonfolder: PathLike,
716
    level: int,
717
    task: str,
718
    thresholds: Dict[int, Tuple[float, float]],
719
    classnames: Dict[int, str],
720
    extensions: Sequence[str] = (".mrxs",),
721
    recurse: bool = True,
722
):
723
    """
724
    Export pathaia csv to a json annotation file compatible with MicroMap.
725
726
    For each predicted category, a layer is created and connected components
727
    are computed.
728
729
    Args:
730
        infolder: abs path to a folder of pathaia csv.
731
        slidefolder: abs path to a folder of slides.
732
        jsonfolder: abs path to a folder of json annotations.
733
        level: level of patches to export.
734
        task: prediction task to export (csv column).
735
        thresholds: histogram bins for each category.
736
        classnames: class names for each category.
737
        extensions: slide extension.
738
739
    """
740
    color_dict = {k: colorCycle[k] for k in classnames}
741
    slidefiles = get_files(slidefolder, extensions=extensions, recurse=recurse).map(str)
742
    for slidefile in tqdm(slidefiles):
743
        slidename, _ = os.path.splitext(os.path.basename(slidefile))
744
        slidedir = os.path.dirname(slidefile)
745
        pathaia_slidedir = slidedir.replace(slidefolder, pathaiafolder)
746
        pathaiacsv = os.path.join(pathaia_slidedir, slidename, "patches.csv")
747
        json_slidedir = slidedir.replace(slidefolder, jsonfolder)
748
        pathaiajson = os.path.join(json_slidedir, "{}.json".format(slidename))
749
        try:
750
            slide = openslide.OpenSlide(slidefile)
751
            # print(f'Saving to json {pathaiajson} from {pathaiacsv}')
752
            gen = gen_categorical_from_floatpreds(pathaiacsv, level, task, thresholds)
753
            layer_edges, layer_meta, interval = get_categorical_layer_edges(
754
                gen, color_dict, classnames
755
            )
756
            layer_segments = get_categorical_segments_from_edges(layer_edges)
757
            annotations = layer_segment_to_json_struct(
758
                interval, layer_segments, layer_meta, slide
759
            )
760
            with open(pathaiajson, "w") as f:
761
                json.dump(annotations, f)
762
        except Exception as e:
763
            warnings.warn(
764
                "slide '{}'"
765
                " with patch file '{}'"
766
                " failed with error: '{}'".format(slidefile, pathaiacsv, str(e))
767
            )