|
a |
|
b/pathaia/util/paths.py |
|
|
1 |
# coding: utf8 |
|
|
2 |
"""Useful functions for handling patches in WSIs.""" |
|
|
3 |
|
|
|
4 |
import os |
|
|
5 |
import numpy |
|
|
6 |
from pathlib import Path |
|
|
7 |
from fastcore.foundation import L, setify |
|
|
8 |
import shutil |
|
|
9 |
from typing import Sequence, List, Optional, Dict |
|
|
10 |
from .types import PathLike |
|
|
11 |
|
|
|
12 |
|
|
|
13 |
def slides_in_folder(folder: str, extensions: Sequence[str] = (".mrxs",)) -> List[str]: |
|
|
14 |
""" |
|
|
15 |
Return slide files inside a folder for a given extension. |
|
|
16 |
|
|
|
17 |
Args: |
|
|
18 |
folder: absolute path to a directory containing slides. |
|
|
19 |
extension: file extensions of the slides. |
|
|
20 |
Returns: |
|
|
21 |
List of absolute paths of slide files. |
|
|
22 |
|
|
|
23 |
""" |
|
|
24 |
abspathlist = [] |
|
|
25 |
for name in os.listdir(folder): |
|
|
26 |
|
|
|
27 |
if not name.startswith("."): |
|
|
28 |
for extension in extensions: |
|
|
29 |
if name.endswith(extension): |
|
|
30 |
abspathlist.append(os.path.join(folder, name)) |
|
|
31 |
return abspathlist |
|
|
32 |
|
|
|
33 |
|
|
|
34 |
def slide_basename(slidepath: str) -> str: |
|
|
35 |
""" |
|
|
36 |
Give the basename of a slide from its absolutepath. |
|
|
37 |
|
|
|
38 |
Args: |
|
|
39 |
slidepath: absolute path to a slide. |
|
|
40 |
|
|
|
41 |
Returns: |
|
|
42 |
basename: basename of the slide. |
|
|
43 |
|
|
|
44 |
""" |
|
|
45 |
base = os.path.basename(slidepath) |
|
|
46 |
basename, ext = os.path.splitext(base) |
|
|
47 |
return basename |
|
|
48 |
|
|
|
49 |
|
|
|
50 |
def imfiles_in_folder( |
|
|
51 |
folder: str, |
|
|
52 |
authorized: Sequence[str] = (".png", ".jpg", ".jpeg", ".tif", ".tiff"), |
|
|
53 |
forbiden: Sequence[str] = ("thumbnail",), |
|
|
54 |
randomize: bool = False, |
|
|
55 |
datalim: Optional[int] = None, |
|
|
56 |
) -> List[str]: |
|
|
57 |
""" |
|
|
58 |
Get image files in a given folder. |
|
|
59 |
|
|
|
60 |
Get all image files (selected by file extension). You can remove terms |
|
|
61 |
from the research. |
|
|
62 |
|
|
|
63 |
Args: |
|
|
64 |
folder: absolute path to an image directory. |
|
|
65 |
authorized: authorized image file extensions. |
|
|
66 |
forbiden: non-authorized words in file names. |
|
|
67 |
randomize: whether to randomize output list of files. |
|
|
68 |
datalim: maximum number of file to extract in folder. |
|
|
69 |
|
|
|
70 |
Returns: |
|
|
71 |
Absolute paths of image files in folder. |
|
|
72 |
|
|
|
73 |
""" |
|
|
74 |
imfiles = [] |
|
|
75 |
for name in os.listdir(folder): |
|
|
76 |
_, ext = os.path.splitext(name) |
|
|
77 |
if ext in authorized: |
|
|
78 |
auth = True |
|
|
79 |
for forb in forbiden: |
|
|
80 |
if forb in name: |
|
|
81 |
auth = False |
|
|
82 |
if auth: |
|
|
83 |
imfiles.append(os.path.join(folder, name)) |
|
|
84 |
|
|
|
85 |
if randomize: |
|
|
86 |
numpy.random.shuffle(imfiles) |
|
|
87 |
if datalim is not None: |
|
|
88 |
imfiles = imfiles[0:datalim] |
|
|
89 |
|
|
|
90 |
return imfiles |
|
|
91 |
|
|
|
92 |
|
|
|
93 |
def dataset2folders( |
|
|
94 |
projfolder: PathLike, |
|
|
95 |
level: int, |
|
|
96 |
randomize: bool = False, |
|
|
97 |
slide_data_lim: Optional[int] = None, |
|
|
98 |
) -> Dict[str, str]: |
|
|
99 |
""" |
|
|
100 |
Link slidenames to their pathaia patch folder. |
|
|
101 |
|
|
|
102 |
A pathaia patch folder is named with an int corresponding |
|
|
103 |
to the level of patch extraction. |
|
|
104 |
|
|
|
105 |
Args: |
|
|
106 |
projfolder: absolute path to a pathaia project folder. |
|
|
107 |
level: pyramid level of patch extraction to consider. |
|
|
108 |
randomize: whether to randomize output list of slides. |
|
|
109 |
slide_data_lim: number of slides to consider in project. |
|
|
110 |
|
|
|
111 |
Returns: |
|
|
112 |
Dictionary mapping slidenames and absolute paths to patch dirs. |
|
|
113 |
|
|
|
114 |
""" |
|
|
115 |
slide2folder = dict() |
|
|
116 |
for slidename in os.listdir(projfolder): |
|
|
117 |
slide_folder = os.path.join(projfolder, slidename) |
|
|
118 |
if os.path.isdir(slide_folder): |
|
|
119 |
level_folder = os.path.join(slide_folder, "level_{}".format(level)) |
|
|
120 |
if os.path.isdir(level_folder): |
|
|
121 |
slide2folder[slidename] = level_folder |
|
|
122 |
|
|
|
123 |
keep = list(slide2folder.keys()) |
|
|
124 |
if randomize: |
|
|
125 |
numpy.random.shuffle(keep) |
|
|
126 |
if slide_data_lim is not None: |
|
|
127 |
keep = keep[0:slide_data_lim] |
|
|
128 |
return {k: slide2folder[k] for k in keep} |
|
|
129 |
|
|
|
130 |
|
|
|
131 |
def _get_files(p, fs, extensions=None): |
|
|
132 |
p = Path(p) |
|
|
133 |
res = [ |
|
|
134 |
p / f |
|
|
135 |
for f in fs |
|
|
136 |
if not f.startswith(".") |
|
|
137 |
and ((not extensions) or f'.{f.split(".")[-1].lower()}' in extensions) |
|
|
138 |
] |
|
|
139 |
return res |
|
|
140 |
|
|
|
141 |
|
|
|
142 |
def get_files( |
|
|
143 |
path: PathLike, |
|
|
144 |
extensions: Optional[Sequence[str]] = None, |
|
|
145 |
recurse: bool = True, |
|
|
146 |
folders: Optional[Sequence[str]] = None, |
|
|
147 |
followlinks: bool = True, |
|
|
148 |
) -> List[Path]: |
|
|
149 |
""" |
|
|
150 |
Find all files in a folder recursively. |
|
|
151 |
|
|
|
152 |
Arguments: |
|
|
153 |
path: Path to input folder. |
|
|
154 |
extensions: list of acceptable file extensions. |
|
|
155 |
recurse: whether to perform a recursive search or not. |
|
|
156 |
folders: direct subfolders to explore (if None explore all). |
|
|
157 |
followlinks: whether to follow symlinks or not. |
|
|
158 |
|
|
|
159 |
Returns: |
|
|
160 |
List of all absolute paths to found files. |
|
|
161 |
""" |
|
|
162 |
path = Path(path) |
|
|
163 |
folders = L(folders) |
|
|
164 |
extensions = setify(extensions) |
|
|
165 |
extensions = {e.lower() for e in extensions} |
|
|
166 |
if recurse: |
|
|
167 |
res = [] |
|
|
168 |
for i, (p, d, f) in enumerate( |
|
|
169 |
os.walk(path, followlinks=followlinks) |
|
|
170 |
): # returns (dirpath, dirnames, filenames) |
|
|
171 |
if len(folders) != 0 and i == 0: |
|
|
172 |
d[:] = [o for o in d if o in folders] |
|
|
173 |
else: |
|
|
174 |
d[:] = [o for o in d if not o.startswith(".")] |
|
|
175 |
if len(folders) != 0 and i == 0 and "." not in folders: |
|
|
176 |
continue |
|
|
177 |
res += _get_files(p, f, extensions) |
|
|
178 |
else: |
|
|
179 |
f = [o.name for o in os.scandir(path) if o.is_file()] |
|
|
180 |
res = _get_files(path, f, extensions) |
|
|
181 |
return L(res) |
|
|
182 |
|
|
|
183 |
|
|
|
184 |
def safe_rmtree( |
|
|
185 |
path: PathLike, ignore_errors: bool = True, erase_tree: Optional[bool] = None |
|
|
186 |
) -> bool: |
|
|
187 |
""" |
|
|
188 |
Safe version of rmtree that asks for permission before deleting. |
|
|
189 |
|
|
|
190 |
Arguments: |
|
|
191 |
path: path to folder to be deleted. |
|
|
192 |
ignore_error: whether to ignore errors or not. |
|
|
193 |
erase_tree: whether to remove tree or not. If None asks for permission. |
|
|
194 |
|
|
|
195 |
Returns: |
|
|
196 |
True if erase_tree==True or if user gave permission. |
|
|
197 |
""" |
|
|
198 |
response = "" |
|
|
199 |
if erase_tree is None: |
|
|
200 |
while response not in ["y", "n"]: |
|
|
201 |
response = input( |
|
|
202 |
"Are you sure you want to delete " f"{path} and all subfolders ? y/n" |
|
|
203 |
) |
|
|
204 |
response = response.lower() |
|
|
205 |
|
|
|
206 |
if response == "y" or erase_tree: |
|
|
207 |
shutil.rmtree(path, ignore_errors=ignore_errors) |
|
|
208 |
return True |
|
|
209 |
else: |
|
|
210 |
return False |