Switch to unified view

a b/utils/segment/dataloaders.py
1
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2
"""
3
Dataloaders
4
"""
5
6
import os
7
import random
8
9
import cv2
10
import numpy as np
11
import torch
12
from torch.utils.data import DataLoader, distributed
13
14
from ..augmentations import augment_hsv, copy_paste, letterbox
15
from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, SmartDistributedSampler, seed_worker
16
from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn
17
from ..torch_utils import torch_distributed_zero_first
18
from .augmentations import mixup, random_perspective
19
20
RANK = int(os.getenv('RANK', -1))
21
22
23
def create_dataloader(path,
24
                      imgsz,
25
                      batch_size,
26
                      stride,
27
                      single_cls=False,
28
                      hyp=None,
29
                      augment=False,
30
                      cache=False,
31
                      pad=0.0,
32
                      rect=False,
33
                      rank=-1,
34
                      workers=8,
35
                      image_weights=False,
36
                      quad=False,
37
                      prefix='',
38
                      shuffle=False,
39
                      mask_downsample_ratio=1,
40
                      overlap_mask=False,
41
                      seed=0):
42
    if rect and shuffle:
43
        LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False')
44
        shuffle = False
45
    with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
46
        dataset = LoadImagesAndLabelsAndMasks(
47
            path,
48
            imgsz,
49
            batch_size,
50
            augment=augment,  # augmentation
51
            hyp=hyp,  # hyperparameters
52
            rect=rect,  # rectangular batches
53
            cache_images=cache,
54
            single_cls=single_cls,
55
            stride=int(stride),
56
            pad=pad,
57
            image_weights=image_weights,
58
            prefix=prefix,
59
            downsample_ratio=mask_downsample_ratio,
60
            overlap=overlap_mask,
61
            rank=rank)
62
63
    batch_size = min(batch_size, len(dataset))
64
    nd = torch.cuda.device_count()  # number of CUDA devices
65
    nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])  # number of workers
66
    sampler = None if rank == -1 else SmartDistributedSampler(dataset, shuffle=shuffle)
67
    loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
68
    generator = torch.Generator()
69
    generator.manual_seed(6148914691236517205 + seed + RANK)
70
    return loader(
71
        dataset,
72
        batch_size=batch_size,
73
        shuffle=shuffle and sampler is None,
74
        num_workers=nw,
75
        sampler=sampler,
76
        pin_memory=True,
77
        collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn,
78
        worker_init_fn=seed_worker,
79
        generator=generator,
80
    ), dataset
81
82
83
class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels):  # for training/testing
84
85
    def __init__(
86
        self,
87
        path,
88
        img_size=640,
89
        batch_size=16,
90
        augment=False,
91
        hyp=None,
92
        rect=False,
93
        image_weights=False,
94
        cache_images=False,
95
        single_cls=False,
96
        stride=32,
97
        pad=0,
98
        min_items=0,
99
        prefix='',
100
        downsample_ratio=1,
101
        overlap=False,
102
        rank=-1,
103
        seed=0,
104
    ):
105
        super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls,
106
                         stride, pad, min_items, prefix, rank, seed)
107
        self.downsample_ratio = downsample_ratio
108
        self.overlap = overlap
109
110
    def __getitem__(self, index):
111
        index = self.indices[index]  # linear, shuffled, or image_weights
112
113
        hyp = self.hyp
114
        mosaic = self.mosaic and random.random() < hyp['mosaic']
115
        masks = []
116
        if mosaic:
117
            # Load mosaic
118
            img, labels, segments = self.load_mosaic(index)
119
            shapes = None
120
121
            # MixUp augmentation
122
            if random.random() < hyp['mixup']:
123
                img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1)))
124
125
        else:
126
            # Load image
127
            img, (h0, w0), (h, w) = self.load_image(index)
128
129
            # Letterbox
130
            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
131
            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
132
            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
133
134
            labels = self.labels[index].copy()
135
            # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy
136
            segments = self.segments[index].copy()
137
            if len(segments):
138
                for i_s in range(len(segments)):
139
                    segments[i_s] = xyn2xy(
140
                        segments[i_s],
141
                        ratio[0] * w,
142
                        ratio[1] * h,
143
                        padw=pad[0],
144
                        padh=pad[1],
145
                    )
146
            if labels.size:  # normalized xywh to pixel xyxy format
147
                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
148
149
            if self.augment:
150
                img, labels, segments = random_perspective(img,
151
                                                           labels,
152
                                                           segments=segments,
153
                                                           degrees=hyp['degrees'],
154
                                                           translate=hyp['translate'],
155
                                                           scale=hyp['scale'],
156
                                                           shear=hyp['shear'],
157
                                                           perspective=hyp['perspective'])
158
159
        nl = len(labels)  # number of labels
160
        if nl:
161
            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
162
            if self.overlap:
163
                masks, sorted_idx = polygons2masks_overlap(img.shape[:2],
164
                                                           segments,
165
                                                           downsample_ratio=self.downsample_ratio)
166
                masks = masks[None]  # (640, 640) -> (1, 640, 640)
167
                labels = labels[sorted_idx]
168
            else:
169
                masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio)
170
171
        masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] //
172
                                                                        self.downsample_ratio, img.shape[1] //
173
                                                                        self.downsample_ratio))
174
        # TODO: albumentations support
175
        if self.augment:
176
            # Albumentations
177
            # there are some augmentation that won't change boxes and masks,
178
            # so just be it for now.
179
            img, labels = self.albumentations(img, labels)
180
            nl = len(labels)  # update after albumentations
181
182
            # HSV color-space
183
            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
184
185
            # Flip up-down
186
            if random.random() < hyp['flipud']:
187
                img = np.flipud(img)
188
                if nl:
189
                    labels[:, 2] = 1 - labels[:, 2]
190
                    masks = torch.flip(masks, dims=[1])
191
192
            # Flip left-right
193
            if random.random() < hyp['fliplr']:
194
                img = np.fliplr(img)
195
                if nl:
196
                    labels[:, 1] = 1 - labels[:, 1]
197
                    masks = torch.flip(masks, dims=[2])
198
199
            # Cutouts  # labels = cutout(img, labels, p=0.5)
200
201
        labels_out = torch.zeros((nl, 6))
202
        if nl:
203
            labels_out[:, 1:] = torch.from_numpy(labels)
204
205
        # Convert
206
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
207
        img = np.ascontiguousarray(img)
208
209
        return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks)
210
211
    def load_mosaic(self, index):
212
        # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
213
        labels4, segments4 = [], []
214
        s = self.img_size
215
        yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border)  # mosaic center x, y
216
217
        # 3 additional image indices
218
        indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
219
        for i, index in enumerate(indices):
220
            # Load image
221
            img, _, (h, w) = self.load_image(index)
222
223
            # place img in img4
224
            if i == 0:  # top left
225
                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
226
                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
227
                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
228
            elif i == 1:  # top right
229
                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
230
                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
231
            elif i == 2:  # bottom left
232
                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
233
                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
234
            elif i == 3:  # bottom right
235
                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
236
                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
237
238
            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
239
            padw = x1a - x1b
240
            padh = y1a - y1b
241
242
            labels, segments = self.labels[index].copy(), self.segments[index].copy()
243
244
            if labels.size:
245
                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
246
                segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
247
            labels4.append(labels)
248
            segments4.extend(segments)
249
250
        # Concat/clip labels
251
        labels4 = np.concatenate(labels4, 0)
252
        for x in (labels4[:, 1:], *segments4):
253
            np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
254
        # img4, labels4 = replicate(img4, labels4)  # replicate
255
256
        # Augment
257
        img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste'])
258
        img4, labels4, segments4 = random_perspective(img4,
259
                                                      labels4,
260
                                                      segments4,
261
                                                      degrees=self.hyp['degrees'],
262
                                                      translate=self.hyp['translate'],
263
                                                      scale=self.hyp['scale'],
264
                                                      shear=self.hyp['shear'],
265
                                                      perspective=self.hyp['perspective'],
266
                                                      border=self.mosaic_border)  # border to remove
267
        return img4, labels4, segments4
268
269
    @staticmethod
270
    def collate_fn(batch):
271
        img, label, path, shapes, masks = zip(*batch)  # transposed
272
        batched_masks = torch.cat(masks, 0)
273
        for i, l in enumerate(label):
274
            l[:, 0] = i  # add target image index for build_targets()
275
        return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks
276
277
278
def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
279
    """
280
    Args:
281
        img_size (tuple): The image size.
282
        polygons (np.ndarray): [N, M], N is the number of polygons,
283
            M is the number of points(Be divided by 2).
284
    """
285
    mask = np.zeros(img_size, dtype=np.uint8)
286
    polygons = np.asarray(polygons)
287
    polygons = polygons.astype(np.int32)
288
    shape = polygons.shape
289
    polygons = polygons.reshape(shape[0], -1, 2)
290
    cv2.fillPoly(mask, polygons, color=color)
291
    nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
292
    # NOTE: fillPoly firstly then resize is trying the keep the same way
293
    # of loss calculation when mask-ratio=1.
294
    mask = cv2.resize(mask, (nw, nh))
295
    return mask
296
297
298
def polygons2masks(img_size, polygons, color, downsample_ratio=1):
299
    """
300
    Args:
301
        img_size (tuple): The image size.
302
        polygons (list[np.ndarray]): each polygon is [N, M],
303
            N is the number of polygons,
304
            M is the number of points(Be divided by 2).
305
    """
306
    masks = []
307
    for si in range(len(polygons)):
308
        mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio)
309
        masks.append(mask)
310
    return np.array(masks)
311
312
313
def polygons2masks_overlap(img_size, segments, downsample_ratio=1):
314
    """Return a (640, 640) overlap mask."""
315
    masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio),
316
                     dtype=np.int32 if len(segments) > 255 else np.uint8)
317
    areas = []
318
    ms = []
319
    for si in range(len(segments)):
320
        mask = polygon2mask(
321
            img_size,
322
            [segments[si].reshape(-1)],
323
            downsample_ratio=downsample_ratio,
324
            color=1,
325
        )
326
        ms.append(mask)
327
        areas.append(mask.sum())
328
    areas = np.asarray(areas)
329
    index = np.argsort(-areas)
330
    ms = np.array(ms)[index]
331
    for i in range(len(segments)):
332
        mask = ms[i] * (i + 1)
333
        masks = masks + mask
334
        masks = np.clip(masks, a_min=0, a_max=i + 1)
335
    return masks, index