|
a |
|
b/utils/segment/dataloaders.py |
|
|
1 |
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license |
|
|
2 |
""" |
|
|
3 |
Dataloaders |
|
|
4 |
""" |
|
|
5 |
|
|
|
6 |
import os |
|
|
7 |
import random |
|
|
8 |
|
|
|
9 |
import cv2 |
|
|
10 |
import numpy as np |
|
|
11 |
import torch |
|
|
12 |
from torch.utils.data import DataLoader, distributed |
|
|
13 |
|
|
|
14 |
from ..augmentations import augment_hsv, copy_paste, letterbox |
|
|
15 |
from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, SmartDistributedSampler, seed_worker |
|
|
16 |
from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn |
|
|
17 |
from ..torch_utils import torch_distributed_zero_first |
|
|
18 |
from .augmentations import mixup, random_perspective |
|
|
19 |
|
|
|
20 |
RANK = int(os.getenv('RANK', -1)) |
|
|
21 |
|
|
|
22 |
|
|
|
23 |
def create_dataloader(path, |
|
|
24 |
imgsz, |
|
|
25 |
batch_size, |
|
|
26 |
stride, |
|
|
27 |
single_cls=False, |
|
|
28 |
hyp=None, |
|
|
29 |
augment=False, |
|
|
30 |
cache=False, |
|
|
31 |
pad=0.0, |
|
|
32 |
rect=False, |
|
|
33 |
rank=-1, |
|
|
34 |
workers=8, |
|
|
35 |
image_weights=False, |
|
|
36 |
quad=False, |
|
|
37 |
prefix='', |
|
|
38 |
shuffle=False, |
|
|
39 |
mask_downsample_ratio=1, |
|
|
40 |
overlap_mask=False, |
|
|
41 |
seed=0): |
|
|
42 |
if rect and shuffle: |
|
|
43 |
LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False') |
|
|
44 |
shuffle = False |
|
|
45 |
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP |
|
|
46 |
dataset = LoadImagesAndLabelsAndMasks( |
|
|
47 |
path, |
|
|
48 |
imgsz, |
|
|
49 |
batch_size, |
|
|
50 |
augment=augment, # augmentation |
|
|
51 |
hyp=hyp, # hyperparameters |
|
|
52 |
rect=rect, # rectangular batches |
|
|
53 |
cache_images=cache, |
|
|
54 |
single_cls=single_cls, |
|
|
55 |
stride=int(stride), |
|
|
56 |
pad=pad, |
|
|
57 |
image_weights=image_weights, |
|
|
58 |
prefix=prefix, |
|
|
59 |
downsample_ratio=mask_downsample_ratio, |
|
|
60 |
overlap=overlap_mask, |
|
|
61 |
rank=rank) |
|
|
62 |
|
|
|
63 |
batch_size = min(batch_size, len(dataset)) |
|
|
64 |
nd = torch.cuda.device_count() # number of CUDA devices |
|
|
65 |
nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers |
|
|
66 |
sampler = None if rank == -1 else SmartDistributedSampler(dataset, shuffle=shuffle) |
|
|
67 |
loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates |
|
|
68 |
generator = torch.Generator() |
|
|
69 |
generator.manual_seed(6148914691236517205 + seed + RANK) |
|
|
70 |
return loader( |
|
|
71 |
dataset, |
|
|
72 |
batch_size=batch_size, |
|
|
73 |
shuffle=shuffle and sampler is None, |
|
|
74 |
num_workers=nw, |
|
|
75 |
sampler=sampler, |
|
|
76 |
pin_memory=True, |
|
|
77 |
collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn, |
|
|
78 |
worker_init_fn=seed_worker, |
|
|
79 |
generator=generator, |
|
|
80 |
), dataset |
|
|
81 |
|
|
|
82 |
|
|
|
83 |
class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing |
|
|
84 |
|
|
|
85 |
def __init__( |
|
|
86 |
self, |
|
|
87 |
path, |
|
|
88 |
img_size=640, |
|
|
89 |
batch_size=16, |
|
|
90 |
augment=False, |
|
|
91 |
hyp=None, |
|
|
92 |
rect=False, |
|
|
93 |
image_weights=False, |
|
|
94 |
cache_images=False, |
|
|
95 |
single_cls=False, |
|
|
96 |
stride=32, |
|
|
97 |
pad=0, |
|
|
98 |
min_items=0, |
|
|
99 |
prefix='', |
|
|
100 |
downsample_ratio=1, |
|
|
101 |
overlap=False, |
|
|
102 |
rank=-1, |
|
|
103 |
seed=0, |
|
|
104 |
): |
|
|
105 |
super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls, |
|
|
106 |
stride, pad, min_items, prefix, rank, seed) |
|
|
107 |
self.downsample_ratio = downsample_ratio |
|
|
108 |
self.overlap = overlap |
|
|
109 |
|
|
|
110 |
def __getitem__(self, index): |
|
|
111 |
index = self.indices[index] # linear, shuffled, or image_weights |
|
|
112 |
|
|
|
113 |
hyp = self.hyp |
|
|
114 |
mosaic = self.mosaic and random.random() < hyp['mosaic'] |
|
|
115 |
masks = [] |
|
|
116 |
if mosaic: |
|
|
117 |
# Load mosaic |
|
|
118 |
img, labels, segments = self.load_mosaic(index) |
|
|
119 |
shapes = None |
|
|
120 |
|
|
|
121 |
# MixUp augmentation |
|
|
122 |
if random.random() < hyp['mixup']: |
|
|
123 |
img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1))) |
|
|
124 |
|
|
|
125 |
else: |
|
|
126 |
# Load image |
|
|
127 |
img, (h0, w0), (h, w) = self.load_image(index) |
|
|
128 |
|
|
|
129 |
# Letterbox |
|
|
130 |
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape |
|
|
131 |
img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) |
|
|
132 |
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling |
|
|
133 |
|
|
|
134 |
labels = self.labels[index].copy() |
|
|
135 |
# [array, array, ....], array.shape=(num_points, 2), xyxyxyxy |
|
|
136 |
segments = self.segments[index].copy() |
|
|
137 |
if len(segments): |
|
|
138 |
for i_s in range(len(segments)): |
|
|
139 |
segments[i_s] = xyn2xy( |
|
|
140 |
segments[i_s], |
|
|
141 |
ratio[0] * w, |
|
|
142 |
ratio[1] * h, |
|
|
143 |
padw=pad[0], |
|
|
144 |
padh=pad[1], |
|
|
145 |
) |
|
|
146 |
if labels.size: # normalized xywh to pixel xyxy format |
|
|
147 |
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) |
|
|
148 |
|
|
|
149 |
if self.augment: |
|
|
150 |
img, labels, segments = random_perspective(img, |
|
|
151 |
labels, |
|
|
152 |
segments=segments, |
|
|
153 |
degrees=hyp['degrees'], |
|
|
154 |
translate=hyp['translate'], |
|
|
155 |
scale=hyp['scale'], |
|
|
156 |
shear=hyp['shear'], |
|
|
157 |
perspective=hyp['perspective']) |
|
|
158 |
|
|
|
159 |
nl = len(labels) # number of labels |
|
|
160 |
if nl: |
|
|
161 |
labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3) |
|
|
162 |
if self.overlap: |
|
|
163 |
masks, sorted_idx = polygons2masks_overlap(img.shape[:2], |
|
|
164 |
segments, |
|
|
165 |
downsample_ratio=self.downsample_ratio) |
|
|
166 |
masks = masks[None] # (640, 640) -> (1, 640, 640) |
|
|
167 |
labels = labels[sorted_idx] |
|
|
168 |
else: |
|
|
169 |
masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio) |
|
|
170 |
|
|
|
171 |
masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] // |
|
|
172 |
self.downsample_ratio, img.shape[1] // |
|
|
173 |
self.downsample_ratio)) |
|
|
174 |
# TODO: albumentations support |
|
|
175 |
if self.augment: |
|
|
176 |
# Albumentations |
|
|
177 |
# there are some augmentation that won't change boxes and masks, |
|
|
178 |
# so just be it for now. |
|
|
179 |
img, labels = self.albumentations(img, labels) |
|
|
180 |
nl = len(labels) # update after albumentations |
|
|
181 |
|
|
|
182 |
# HSV color-space |
|
|
183 |
augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) |
|
|
184 |
|
|
|
185 |
# Flip up-down |
|
|
186 |
if random.random() < hyp['flipud']: |
|
|
187 |
img = np.flipud(img) |
|
|
188 |
if nl: |
|
|
189 |
labels[:, 2] = 1 - labels[:, 2] |
|
|
190 |
masks = torch.flip(masks, dims=[1]) |
|
|
191 |
|
|
|
192 |
# Flip left-right |
|
|
193 |
if random.random() < hyp['fliplr']: |
|
|
194 |
img = np.fliplr(img) |
|
|
195 |
if nl: |
|
|
196 |
labels[:, 1] = 1 - labels[:, 1] |
|
|
197 |
masks = torch.flip(masks, dims=[2]) |
|
|
198 |
|
|
|
199 |
# Cutouts # labels = cutout(img, labels, p=0.5) |
|
|
200 |
|
|
|
201 |
labels_out = torch.zeros((nl, 6)) |
|
|
202 |
if nl: |
|
|
203 |
labels_out[:, 1:] = torch.from_numpy(labels) |
|
|
204 |
|
|
|
205 |
# Convert |
|
|
206 |
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB |
|
|
207 |
img = np.ascontiguousarray(img) |
|
|
208 |
|
|
|
209 |
return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks) |
|
|
210 |
|
|
|
211 |
def load_mosaic(self, index): |
|
|
212 |
# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic |
|
|
213 |
labels4, segments4 = [], [] |
|
|
214 |
s = self.img_size |
|
|
215 |
yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y |
|
|
216 |
|
|
|
217 |
# 3 additional image indices |
|
|
218 |
indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices |
|
|
219 |
for i, index in enumerate(indices): |
|
|
220 |
# Load image |
|
|
221 |
img, _, (h, w) = self.load_image(index) |
|
|
222 |
|
|
|
223 |
# place img in img4 |
|
|
224 |
if i == 0: # top left |
|
|
225 |
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles |
|
|
226 |
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) |
|
|
227 |
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) |
|
|
228 |
elif i == 1: # top right |
|
|
229 |
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc |
|
|
230 |
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h |
|
|
231 |
elif i == 2: # bottom left |
|
|
232 |
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) |
|
|
233 |
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) |
|
|
234 |
elif i == 3: # bottom right |
|
|
235 |
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) |
|
|
236 |
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) |
|
|
237 |
|
|
|
238 |
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] |
|
|
239 |
padw = x1a - x1b |
|
|
240 |
padh = y1a - y1b |
|
|
241 |
|
|
|
242 |
labels, segments = self.labels[index].copy(), self.segments[index].copy() |
|
|
243 |
|
|
|
244 |
if labels.size: |
|
|
245 |
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format |
|
|
246 |
segments = [xyn2xy(x, w, h, padw, padh) for x in segments] |
|
|
247 |
labels4.append(labels) |
|
|
248 |
segments4.extend(segments) |
|
|
249 |
|
|
|
250 |
# Concat/clip labels |
|
|
251 |
labels4 = np.concatenate(labels4, 0) |
|
|
252 |
for x in (labels4[:, 1:], *segments4): |
|
|
253 |
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() |
|
|
254 |
# img4, labels4 = replicate(img4, labels4) # replicate |
|
|
255 |
|
|
|
256 |
# Augment |
|
|
257 |
img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste']) |
|
|
258 |
img4, labels4, segments4 = random_perspective(img4, |
|
|
259 |
labels4, |
|
|
260 |
segments4, |
|
|
261 |
degrees=self.hyp['degrees'], |
|
|
262 |
translate=self.hyp['translate'], |
|
|
263 |
scale=self.hyp['scale'], |
|
|
264 |
shear=self.hyp['shear'], |
|
|
265 |
perspective=self.hyp['perspective'], |
|
|
266 |
border=self.mosaic_border) # border to remove |
|
|
267 |
return img4, labels4, segments4 |
|
|
268 |
|
|
|
269 |
@staticmethod |
|
|
270 |
def collate_fn(batch): |
|
|
271 |
img, label, path, shapes, masks = zip(*batch) # transposed |
|
|
272 |
batched_masks = torch.cat(masks, 0) |
|
|
273 |
for i, l in enumerate(label): |
|
|
274 |
l[:, 0] = i # add target image index for build_targets() |
|
|
275 |
return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks |
|
|
276 |
|
|
|
277 |
|
|
|
278 |
def polygon2mask(img_size, polygons, color=1, downsample_ratio=1): |
|
|
279 |
""" |
|
|
280 |
Args: |
|
|
281 |
img_size (tuple): The image size. |
|
|
282 |
polygons (np.ndarray): [N, M], N is the number of polygons, |
|
|
283 |
M is the number of points(Be divided by 2). |
|
|
284 |
""" |
|
|
285 |
mask = np.zeros(img_size, dtype=np.uint8) |
|
|
286 |
polygons = np.asarray(polygons) |
|
|
287 |
polygons = polygons.astype(np.int32) |
|
|
288 |
shape = polygons.shape |
|
|
289 |
polygons = polygons.reshape(shape[0], -1, 2) |
|
|
290 |
cv2.fillPoly(mask, polygons, color=color) |
|
|
291 |
nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio) |
|
|
292 |
# NOTE: fillPoly firstly then resize is trying the keep the same way |
|
|
293 |
# of loss calculation when mask-ratio=1. |
|
|
294 |
mask = cv2.resize(mask, (nw, nh)) |
|
|
295 |
return mask |
|
|
296 |
|
|
|
297 |
|
|
|
298 |
def polygons2masks(img_size, polygons, color, downsample_ratio=1): |
|
|
299 |
""" |
|
|
300 |
Args: |
|
|
301 |
img_size (tuple): The image size. |
|
|
302 |
polygons (list[np.ndarray]): each polygon is [N, M], |
|
|
303 |
N is the number of polygons, |
|
|
304 |
M is the number of points(Be divided by 2). |
|
|
305 |
""" |
|
|
306 |
masks = [] |
|
|
307 |
for si in range(len(polygons)): |
|
|
308 |
mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio) |
|
|
309 |
masks.append(mask) |
|
|
310 |
return np.array(masks) |
|
|
311 |
|
|
|
312 |
|
|
|
313 |
def polygons2masks_overlap(img_size, segments, downsample_ratio=1): |
|
|
314 |
"""Return a (640, 640) overlap mask.""" |
|
|
315 |
masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio), |
|
|
316 |
dtype=np.int32 if len(segments) > 255 else np.uint8) |
|
|
317 |
areas = [] |
|
|
318 |
ms = [] |
|
|
319 |
for si in range(len(segments)): |
|
|
320 |
mask = polygon2mask( |
|
|
321 |
img_size, |
|
|
322 |
[segments[si].reshape(-1)], |
|
|
323 |
downsample_ratio=downsample_ratio, |
|
|
324 |
color=1, |
|
|
325 |
) |
|
|
326 |
ms.append(mask) |
|
|
327 |
areas.append(mask.sum()) |
|
|
328 |
areas = np.asarray(areas) |
|
|
329 |
index = np.argsort(-areas) |
|
|
330 |
ms = np.array(ms)[index] |
|
|
331 |
for i in range(len(segments)): |
|
|
332 |
mask = ms[i] * (i + 1) |
|
|
333 |
masks = masks + mask |
|
|
334 |
masks = np.clip(masks, a_min=0, a_max=i + 1) |
|
|
335 |
return masks, index |