[dd9da8]: / code / utils.py

Download this file

146 lines (106 with data), 4.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
"""
DeepSlide
General helper methods used in other functions.
Authors: Jason Wei, Behnaz Abdollahi, Saeed Hassanpour
"""
import datetime
from pathlib import Path
from typing import (Dict, List)
# Valid image extensions.
IMAGE_EXTS = [".jpg", ".jpeg", ".png", ".svs", ".tif", ".tiff"]
def get_classes(folder: Path) -> List[str]:
"""
Find the classes for classification.
Args:
folder: Folder containing the subfolders named by class.
Returns:
A list of strings corresponding to the class names.
"""
return sorted([f.name for f in folder.iterdir() if
((folder.joinpath(f.name).is_dir()) and (".DS_Store" not in f.name))], key=str)
def get_log_csv_name(log_folder: Path) -> Path:
"""
Find the name of the CSV file for logging.
Args:
log_folder: Folder to save logging CSV file in.
Returns:
The path including the filename of the logging CSV file with date information.
"""
now = datetime.datetime.now()
return log_folder.joinpath(f"log_{now.month}{now.day}{now.year}"
f"_{now.hour}{now.minute}{now.second}.csv")
def get_image_names(folder: Path) -> List[Path]:
"""
Find the names and paths of all of the images in a folder.
Args:
folder: Folder containing images (assume folder only contains images).
Returns:
A list of the names with paths of the images in a folder.
"""
return sorted([Path(f.name) for f in folder.iterdir() if
((folder.joinpath(f.name).is_file()) and (".DS_Store" not in f.name) and (f.suffix.casefold() in IMAGE_EXTS))], key=str)
def get_image_paths(folder: Path) -> List[Path]:
"""
Find the full paths of the images in a folder.
Args:
folder: Folder containing images (assume folder only contains images).
Returns:
A list of the full paths to the images in the folder.
"""
return sorted([folder.joinpath(f.name) for f in folder.iterdir() if
((folder.joinpath(f.name).is_file()) and (".DS_Store" not in f.name) and (f.suffix.casefold() in IMAGE_EXTS))], key=str)
def get_subfolder_paths(folder: Path) -> List[Path]:
"""
Find the paths of subfolders.
Args:
folder: Folder to look for subfolders in.
Returns:
A list containing the paths of the subfolders.
"""
return sorted([folder.joinpath(f.name) for f in folder.iterdir() if
((folder.joinpath(f.name).is_dir()) and (".DS_Store" not in f.name))], key=str)
def get_all_image_paths(master_folder: Path) -> List[Path]:
"""
Finds all image paths in subfolders.
Args:
master_folder: Root folder containing subfolders.
Returns:
A list of the paths to the images found in the folder.
"""
all_paths = []
subfolders = get_subfolder_paths(folder=master_folder)
if len(subfolders) > 1:
for subfolder in subfolders:
all_paths += get_image_paths(folder=subfolder)
else:
all_paths = get_image_paths(folder=master_folder)
return all_paths
def get_csv_paths(folder: Path) -> List[Path]:
"""
Find the CSV files contained in a folder.
Args:
folder: Folder to search for CSV files.
Returns:
A list of the paths to the CSV files in the folder.
"""
return sorted([folder.joinpath(f.name) for f in folder.iterdir() if (
(folder.joinpath(f.name).is_file()) and ("csv" in f.name) and (".DS_Store" not in f.name))],
key=str)
def create_labels(csv_path: Path) -> Dict[str, str]:
"""
Read the labels from a CSV file.
Args:
csv_path: Path to the CSV file.
Returns:
A dictionary mapping string filenames to string labels.
"""
with csv_path.open(mode="r") as lines_open:
lines = lines_open.readlines()[1:]
file_to_gt_label = {}
for line in lines:
if len(line) > 3:
pieces = line[:-1].split(",")
file = pieces[0]
gt_label = pieces[1]
file_to_gt_label[file] = gt_label
return file_to_gt_label