a b/pathaia/util/paths.py
1
# coding: utf8
2
"""Useful functions for handling patches in WSIs."""
3
4
import os
5
import numpy
6
from pathlib import Path
7
from fastcore.foundation import L, setify
8
import shutil
9
from typing import Sequence, List, Optional, Dict
10
from .types import PathLike
11
12
13
def slides_in_folder(folder: str, extensions: Sequence[str] = (".mrxs",)) -> List[str]:
14
    """
15
    Return slide files inside a folder for a given extension.
16
17
    Args:
18
        folder: absolute path to a directory containing slides.
19
        extension: file extensions of the slides.
20
    Returns:
21
        List of absolute paths of slide files.
22
23
    """
24
    abspathlist = []
25
    for name in os.listdir(folder):
26
27
        if not name.startswith("."):
28
            for extension in extensions:
29
                if name.endswith(extension):
30
                    abspathlist.append(os.path.join(folder, name))
31
    return abspathlist
32
33
34
def slide_basename(slidepath: str) -> str:
35
    """
36
    Give the basename of a slide from its absolutepath.
37
38
    Args:
39
        slidepath: absolute path to a slide.
40
41
    Returns:
42
        basename: basename of the slide.
43
44
    """
45
    base = os.path.basename(slidepath)
46
    basename, ext = os.path.splitext(base)
47
    return basename
48
49
50
def imfiles_in_folder(
51
    folder: str,
52
    authorized: Sequence[str] = (".png", ".jpg", ".jpeg", ".tif", ".tiff"),
53
    forbiden: Sequence[str] = ("thumbnail",),
54
    randomize: bool = False,
55
    datalim: Optional[int] = None,
56
) -> List[str]:
57
    """
58
    Get image files in a given folder.
59
60
    Get all image files (selected by file extension). You can remove terms
61
    from the research.
62
63
    Args:
64
        folder: absolute path to an image directory.
65
        authorized: authorized image file extensions.
66
        forbiden: non-authorized words in file names.
67
        randomize: whether to randomize output list of files.
68
        datalim: maximum number of file to extract in folder.
69
70
    Returns:
71
        Absolute paths of image files in folder.
72
73
    """
74
    imfiles = []
75
    for name in os.listdir(folder):
76
        _, ext = os.path.splitext(name)
77
        if ext in authorized:
78
            auth = True
79
            for forb in forbiden:
80
                if forb in name:
81
                    auth = False
82
            if auth:
83
                imfiles.append(os.path.join(folder, name))
84
85
    if randomize:
86
        numpy.random.shuffle(imfiles)
87
    if datalim is not None:
88
        imfiles = imfiles[0:datalim]
89
90
    return imfiles
91
92
93
def dataset2folders(
94
    projfolder: PathLike,
95
    level: int,
96
    randomize: bool = False,
97
    slide_data_lim: Optional[int] = None,
98
) -> Dict[str, str]:
99
    """
100
    Link slidenames to their pathaia patch folder.
101
102
    A pathaia patch folder is named with an int corresponding
103
    to the level of patch extraction.
104
105
    Args:
106
        projfolder: absolute path to a pathaia project folder.
107
        level: pyramid level of patch extraction to consider.
108
        randomize: whether to randomize output list of slides.
109
        slide_data_lim: number of slides to consider in project.
110
111
    Returns:
112
        Dictionary mapping slidenames and absolute paths to patch dirs.
113
114
    """
115
    slide2folder = dict()
116
    for slidename in os.listdir(projfolder):
117
        slide_folder = os.path.join(projfolder, slidename)
118
        if os.path.isdir(slide_folder):
119
            level_folder = os.path.join(slide_folder, "level_{}".format(level))
120
            if os.path.isdir(level_folder):
121
                slide2folder[slidename] = level_folder
122
123
    keep = list(slide2folder.keys())
124
    if randomize:
125
        numpy.random.shuffle(keep)
126
    if slide_data_lim is not None:
127
        keep = keep[0:slide_data_lim]
128
    return {k: slide2folder[k] for k in keep}
129
130
131
def _get_files(p, fs, extensions=None):
132
    p = Path(p)
133
    res = [
134
        p / f
135
        for f in fs
136
        if not f.startswith(".")
137
        and ((not extensions) or f'.{f.split(".")[-1].lower()}' in extensions)
138
    ]
139
    return res
140
141
142
def get_files(
143
    path: PathLike,
144
    extensions: Optional[Sequence[str]] = None,
145
    recurse: bool = True,
146
    folders: Optional[Sequence[str]] = None,
147
    followlinks: bool = True,
148
) -> List[Path]:
149
    """
150
    Find all files in a folder recursively.
151
152
    Arguments:
153
        path: Path to input folder.
154
        extensions: list of acceptable file extensions.
155
        recurse: whether to perform a recursive search or not.
156
        folders: direct subfolders to explore (if None explore all).
157
        followlinks: whether to follow symlinks or not.
158
159
    Returns:
160
        List of all absolute paths to found files.
161
    """
162
    path = Path(path)
163
    folders = L(folders)
164
    extensions = setify(extensions)
165
    extensions = {e.lower() for e in extensions}
166
    if recurse:
167
        res = []
168
        for i, (p, d, f) in enumerate(
169
            os.walk(path, followlinks=followlinks)
170
        ):  # returns (dirpath, dirnames, filenames)
171
            if len(folders) != 0 and i == 0:
172
                d[:] = [o for o in d if o in folders]
173
            else:
174
                d[:] = [o for o in d if not o.startswith(".")]
175
            if len(folders) != 0 and i == 0 and "." not in folders:
176
                continue
177
            res += _get_files(p, f, extensions)
178
    else:
179
        f = [o.name for o in os.scandir(path) if o.is_file()]
180
        res = _get_files(path, f, extensions)
181
    return L(res)
182
183
184
def safe_rmtree(
185
    path: PathLike, ignore_errors: bool = True, erase_tree: Optional[bool] = None
186
) -> bool:
187
    """
188
    Safe version of rmtree that asks for permission before deleting.
189
190
    Arguments:
191
        path: path to folder to be deleted.
192
        ignore_error: whether to ignore errors or not.
193
        erase_tree: whether to remove tree or not. If None asks for permission.
194
195
    Returns:
196
        True if erase_tree==True or if user gave permission.
197
    """
198
    response = ""
199
    if erase_tree is None:
200
        while response not in ["y", "n"]:
201
            response = input(
202
                "Are you sure you want to delete " f"{path} and all subfolders ? y/n"
203
            )
204
            response = response.lower()
205
206
    if response == "y" or erase_tree:
207
        shutil.rmtree(path, ignore_errors=ignore_errors)
208
        return True
209
    else:
210
        return False