Diff of /yolov5/utils/datasets.py [000000] .. [f26a44]

Switch to unified view

a b/yolov5/utils/datasets.py
1
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
"""
3
Dataloaders and dataset utils
4
"""
5
6
import glob
7
import hashlib
8
import json
9
import os
10
import random
11
import shutil
12
import time
13
from itertools import repeat
14
from multiprocessing.pool import Pool, ThreadPool
15
from pathlib import Path
16
from threading import Thread
17
from zipfile import ZipFile
18
19
import cv2
20
import numpy as np
21
import torch
22
import torch.nn.functional as F
23
import yaml
24
from PIL import ExifTags, Image, ImageOps
25
from torch.utils.data import DataLoader, Dataset, dataloader, distributed
26
from tqdm import tqdm
27
28
from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
29
from utils.general import (LOGGER, check_dataset, check_requirements, check_yaml, clean_str, segments2boxes, xyn2xy,
30
                           xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
31
from utils.torch_utils import torch_distributed_zero_first
32
33
# Parameters
34
HELP_URL = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
35
IMG_FORMATS = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo']  # acceptable image suffixes
36
VID_FORMATS = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv']  # acceptable video suffixes
37
WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))  # DPP
38
NUM_THREADS = min(8, max(1, os.cpu_count() - 1))  # number of multiprocessing threads
39
40
# Get orientation exif tag
41
for orientation in ExifTags.TAGS.keys():
42
    if ExifTags.TAGS[orientation] == 'Orientation':
43
        break
44
45
46
def get_hash(paths):
47
    # Returns a single hash value of a list of paths (files or dirs)
48
    size = sum(os.path.getsize(p) for p in paths if os.path.exists(p))  # sizes
49
    h = hashlib.md5(str(size).encode())  # hash sizes
50
    h.update(''.join(paths).encode())  # hash paths
51
    return h.hexdigest()  # return hash
52
53
54
def exif_size(img):
55
    # Returns exif-corrected PIL size
56
    s = img.size  # (width, height)
57
    try:
58
        rotation = dict(img._getexif().items())[orientation]
59
        if rotation == 6:  # rotation 270
60
            s = (s[1], s[0])
61
        elif rotation == 8:  # rotation 90
62
            s = (s[1], s[0])
63
    except:
64
        pass
65
66
    return s
67
68
69
def exif_transpose(image):
70
    """
71
    Transpose a PIL image accordingly if it has an EXIF Orientation tag.
72
    Inplace version of https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py exif_transpose()
73
74
    :param image: The image to transpose.
75
    :return: An image.
76
    """
77
    exif = image.getexif()
78
    orientation = exif.get(0x0112, 1)  # default 1
79
    if orientation > 1:
80
        method = {2: Image.FLIP_LEFT_RIGHT,
81
                  3: Image.ROTATE_180,
82
                  4: Image.FLIP_TOP_BOTTOM,
83
                  5: Image.TRANSPOSE,
84
                  6: Image.ROTATE_270,
85
                  7: Image.TRANSVERSE,
86
                  8: Image.ROTATE_90,
87
                  }.get(orientation)
88
        if method is not None:
89
            image = image.transpose(method)
90
            del exif[0x0112]
91
            image.info["exif"] = exif.tobytes()
92
    return image
93
94
95
def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0,
96
                      rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix='', shuffle=False):
97
    if rect and shuffle:
98
        LOGGER.warning('WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False')
99
        shuffle = False
100
    with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
101
        dataset = LoadImagesAndLabels(path, imgsz, batch_size,
102
                                      augment=augment,  # augmentation
103
                                      hyp=hyp,  # hyperparameters
104
                                      rect=rect,  # rectangular batches
105
                                      cache_images=cache,
106
                                      single_cls=single_cls,
107
                                      stride=int(stride),
108
                                      pad=pad,
109
                                      image_weights=image_weights,
110
                                      prefix=prefix)
111
112
    batch_size = min(batch_size, len(dataset))
113
    nw = min([os.cpu_count() // WORLD_SIZE, batch_size if batch_size > 1 else 0, workers])  # number of workers
114
    sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
115
    loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
116
    return loader(dataset,
117
                  batch_size=batch_size,
118
                  shuffle=shuffle and sampler is None,
119
                  num_workers=nw,
120
                  sampler=sampler,
121
                  pin_memory=True,
122
                  collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn), dataset
123
124
125
class InfiniteDataLoader(dataloader.DataLoader):
126
    """ Dataloader that reuses workers
127
128
    Uses same syntax as vanilla DataLoader
129
    """
130
131
    def __init__(self, *args, **kwargs):
132
        super().__init__(*args, **kwargs)
133
        object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
134
        self.iterator = super().__iter__()
135
136
    def __len__(self):
137
        return len(self.batch_sampler.sampler)
138
139
    def __iter__(self):
140
        for i in range(len(self)):
141
            yield next(self.iterator)
142
143
144
class _RepeatSampler:
145
    """ Sampler that repeats forever
146
147
    Args:
148
        sampler (Sampler)
149
    """
150
151
    def __init__(self, sampler):
152
        self.sampler = sampler
153
154
    def __iter__(self):
155
        while True:
156
            yield from iter(self.sampler)
157
158
159
class LoadImages:
160
    # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
161
    def __init__(self, path, img_size=640, stride=32, auto=True):
162
        p = str(Path(path).resolve())  # os-agnostic absolute path
163
        if '*' in p:
164
            files = sorted(glob.glob(p, recursive=True))  # glob
165
        elif os.path.isdir(p):
166
            files = sorted(glob.glob(os.path.join(p, '*.*')))  # dir
167
        elif os.path.isfile(p):
168
            files = [p]  # files
169
        else:
170
            raise Exception(f'ERROR: {p} does not exist')
171
172
        images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
173
        videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
174
        ni, nv = len(images), len(videos)
175
176
        self.img_size = img_size
177
        self.stride = stride
178
        self.files = images + videos
179
        self.nf = ni + nv  # number of files
180
        self.video_flag = [False] * ni + [True] * nv
181
        self.mode = 'image'
182
        self.auto = auto
183
        if any(videos):
184
            self.new_video(videos[0])  # new video
185
        else:
186
            self.cap = None
187
        assert self.nf > 0, f'No images or videos found in {p}. ' \
188
                            f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
189
190
    def __iter__(self):
191
        self.count = 0
192
        return self
193
194
    def __next__(self):
195
        if self.count == self.nf:
196
            raise StopIteration
197
        path = self.files[self.count]
198
199
        if self.video_flag[self.count]:
200
            # Read video
201
            self.mode = 'video'
202
            ret_val, img0 = self.cap.read()
203
            if not ret_val:
204
                self.count += 1
205
                self.cap.release()
206
                if self.count == self.nf:  # last video
207
                    raise StopIteration
208
                else:
209
                    path = self.files[self.count]
210
                    self.new_video(path)
211
                    ret_val, img0 = self.cap.read()
212
213
            self.frame += 1
214
            s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '
215
216
        else:
217
            # Read image
218
            self.count += 1
219
            img0 = cv2.imread(path)  # BGR
220
            assert img0 is not None, f'Image Not Found {path}'
221
            s = f'image {self.count}/{self.nf} {path}: '
222
223
        # Padded resize
224
        img = letterbox(img0, self.img_size, stride=self.stride, auto=self.auto)[0]
225
226
        # Convert
227
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
228
        img = np.ascontiguousarray(img)
229
230
        return path, img, img0, self.cap, s
231
232
    def new_video(self, path):
233
        self.frame = 0
234
        self.cap = cv2.VideoCapture(path)
235
        self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
236
237
    def __len__(self):
238
        return self.nf  # number of files
239
240
241
class LoadWebcam:  # for inference
242
    # YOLOv5 local webcam dataloader, i.e. `python detect.py --source 0`
243
    def __init__(self, pipe='0', img_size=640, stride=32):
244
        self.img_size = img_size
245
        self.stride = stride
246
        self.pipe = eval(pipe) if pipe.isnumeric() else pipe
247
        self.cap = cv2.VideoCapture(self.pipe)  # video capture object
248
        self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3)  # set buffer size
249
250
    def __iter__(self):
251
        self.count = -1
252
        return self
253
254
    def __next__(self):
255
        self.count += 1
256
        if cv2.waitKey(1) == ord('q'):  # q to quit
257
            self.cap.release()
258
            cv2.destroyAllWindows()
259
            raise StopIteration
260
261
        # Read frame
262
        ret_val, img0 = self.cap.read()
263
        img0 = cv2.flip(img0, 1)  # flip left-right
264
265
        # Print
266
        assert ret_val, f'Camera Error {self.pipe}'
267
        img_path = 'webcam.jpg'
268
        s = f'webcam {self.count}: '
269
270
        # Padded resize
271
        img = letterbox(img0, self.img_size, stride=self.stride)[0]
272
273
        # Convert
274
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
275
        img = np.ascontiguousarray(img)
276
277
        return img_path, img, img0, None, s
278
279
    def __len__(self):
280
        return 0
281
282
283
class LoadStreams:
284
    # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP streams`
285
    def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True):
286
        self.mode = 'stream'
287
        self.img_size = img_size
288
        self.stride = stride
289
290
        if os.path.isfile(sources):
291
            with open(sources) as f:
292
                sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
293
        else:
294
            sources = [sources]
295
296
        n = len(sources)
297
        self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n
298
        self.sources = [clean_str(x) for x in sources]  # clean source names for later
299
        self.auto = auto
300
        for i, s in enumerate(sources):  # index, source
301
            # Start thread to read frames from video stream
302
            st = f'{i + 1}/{n}: {s}... '
303
            if 'youtube.com/' in s or 'youtu.be/' in s:  # if source is YouTube video
304
                check_requirements(('pafy', 'youtube_dl'))
305
                import pafy
306
                s = pafy.new(s).getbest(preftype="mp4").url  # YouTube URL
307
            s = eval(s) if s.isnumeric() else s  # i.e. s = '0' local webcam
308
            cap = cv2.VideoCapture(s)
309
            assert cap.isOpened(), f'{st}Failed to open {s}'
310
            w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
311
            h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
312
            self.fps[i] = max(cap.get(cv2.CAP_PROP_FPS) % 100, 0) or 30.0  # 30 FPS fallback
313
            self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf')  # infinite stream fallback
314
315
            _, self.imgs[i] = cap.read()  # guarantee first frame
316
            self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True)
317
            LOGGER.info(f"{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)")
318
            self.threads[i].start()
319
        LOGGER.info('')  # newline
320
321
        # check for common shapes
322
        s = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0].shape for x in self.imgs])
323
        self.rect = np.unique(s, axis=0).shape[0] == 1  # rect inference if all shapes equal
324
        if not self.rect:
325
            LOGGER.warning('WARNING: Stream shapes differ. For optimal performance supply similarly-shaped streams.')
326
327
    def update(self, i, cap, stream):
328
        # Read stream `i` frames in daemon thread
329
        n, f, read = 0, self.frames[i], 1  # frame number, frame array, inference every 'read' frame
330
        while cap.isOpened() and n < f:
331
            n += 1
332
            # _, self.imgs[index] = cap.read()
333
            cap.grab()
334
            if n % read == 0:
335
                success, im = cap.retrieve()
336
                if success:
337
                    self.imgs[i] = im
338
                else:
339
                    LOGGER.warning('WARNING: Video stream unresponsive, please check your IP camera connection.')
340
                    self.imgs[i] *= 0
341
                    cap.open(stream)  # re-open stream if signal was lost
342
            time.sleep(1 / self.fps[i])  # wait time
343
344
    def __iter__(self):
345
        self.count = -1
346
        return self
347
348
    def __next__(self):
349
        self.count += 1
350
        if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'):  # q to quit
351
            cv2.destroyAllWindows()
352
            raise StopIteration
353
354
        # Letterbox
355
        img0 = self.imgs.copy()
356
        img = [letterbox(x, self.img_size, stride=self.stride, auto=self.rect and self.auto)[0] for x in img0]
357
358
        # Stack
359
        img = np.stack(img, 0)
360
361
        # Convert
362
        img = img[..., ::-1].transpose((0, 3, 1, 2))  # BGR to RGB, BHWC to BCHW
363
        img = np.ascontiguousarray(img)
364
365
        return self.sources, img, img0, None, ''
366
367
    def __len__(self):
368
        return len(self.sources)  # 1E12 frames = 32 streams at 30 FPS for 30 years
369
370
371
def img2label_paths(img_paths):
372
    # Define label paths as a function of image paths
373
    sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep  # /images/, /labels/ substrings
374
    return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
375
376
377
class LoadImagesAndLabels(Dataset):
378
    # YOLOv5 train_loader/val_loader, loads images and labels for training and validation
379
    cache_version = 0.6  # dataset labels *.cache version
380
381
    def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
382
                 cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
383
        self.img_size = img_size
384
        self.augment = augment
385
        self.hyp = hyp
386
        self.image_weights = image_weights
387
        self.rect = False if image_weights else rect
388
        self.mosaic = self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)
389
        self.mosaic_border = [-img_size // 2, -img_size // 2]
390
        self.stride = stride
391
        self.path = path
392
        self.albumentations = Albumentations() if augment else None
393
394
        try:
395
            f = []  # image files
396
            for p in path if isinstance(path, list) else [path]:
397
                p = Path(p)  # os-agnostic
398
                if p.is_dir():  # dir
399
                    f += glob.glob(str(p / '**' / '*.*'), recursive=True)
400
                    # f = list(p.rglob('*.*'))  # pathlib
401
                elif p.is_file():  # file
402
                    with open(p) as t:
403
                        t = t.read().strip().splitlines()
404
                        parent = str(p.parent) + os.sep
405
                        f += [x.replace('./', parent) if x.startswith('./') else x for x in t]  # local to global path
406
                        # f += [p.parent / x.lstrip(os.sep) for x in t]  # local to global path (pathlib)
407
                else:
408
                    raise Exception(f'{prefix}{p} does not exist')
409
            self.img_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)
410
            # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS])  # pathlib
411
            assert self.img_files, f'{prefix}No images found'
412
        except Exception as e:
413
            raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}')
414
415
        # Check cache
416
        self.label_files = img2label_paths(self.img_files)  # labels
417
        cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
418
        try:
419
            cache, exists = np.load(cache_path, allow_pickle=True).item(), True  # load dict
420
            assert cache['version'] == self.cache_version  # same version
421
            assert cache['hash'] == get_hash(self.label_files + self.img_files)  # same hash
422
        except:
423
            cache, exists = self.cache_labels(cache_path, prefix), False  # cache
424
425
        # Display cache
426
        nf, nm, ne, nc, n = cache.pop('results')  # found, missing, empty, corrupted, total
427
        if exists:
428
            d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
429
            tqdm(None, desc=prefix + d, total=n, initial=n)  # display cache results
430
            if cache['msgs']:
431
                LOGGER.info('\n'.join(cache['msgs']))  # display warnings
432
        assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}'
433
434
        # Read cache
435
        [cache.pop(k) for k in ('hash', 'version', 'msgs')]  # remove items
436
        labels, shapes, self.segments = zip(*cache.values())
437
        self.labels = list(labels)
438
        self.shapes = np.array(shapes, dtype=np.float64)
439
        self.img_files = list(cache.keys())  # update
440
        self.label_files = img2label_paths(cache.keys())  # update
441
        n = len(shapes)  # number of images
442
        bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
443
        nb = bi[-1] + 1  # number of batches
444
        self.batch = bi  # batch index of image
445
        self.n = n
446
        self.indices = range(n)
447
448
        # Update labels
449
        include_class = []  # filter labels to include only these classes (optional)
450
        include_class_array = np.array(include_class).reshape(1, -1)
451
        for i, (label, segment) in enumerate(zip(self.labels, self.segments)):
452
            if include_class:
453
                j = (label[:, 0:1] == include_class_array).any(1)
454
                self.labels[i] = label[j]
455
                if segment:
456
                    self.segments[i] = segment[j]
457
            if single_cls:  # single-class training, merge all classes into 0
458
                self.labels[i][:, 0] = 0
459
                if segment:
460
                    self.segments[i][:, 0] = 0
461
462
        # Rectangular Training
463
        if self.rect:
464
            # Sort by aspect ratio
465
            s = self.shapes  # wh
466
            ar = s[:, 1] / s[:, 0]  # aspect ratio
467
            irect = ar.argsort()
468
            self.img_files = [self.img_files[i] for i in irect]
469
            self.label_files = [self.label_files[i] for i in irect]
470
            self.labels = [self.labels[i] for i in irect]
471
            self.shapes = s[irect]  # wh
472
            ar = ar[irect]
473
474
            # Set training image shapes
475
            shapes = [[1, 1]] * nb
476
            for i in range(nb):
477
                ari = ar[bi == i]
478
                mini, maxi = ari.min(), ari.max()
479
                if maxi < 1:
480
                    shapes[i] = [maxi, 1]
481
                elif mini > 1:
482
                    shapes[i] = [1, 1 / mini]
483
484
            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
485
486
        # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
487
        self.imgs, self.img_npy = [None] * n, [None] * n
488
        if cache_images:
489
            if cache_images == 'disk':
490
                self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + '_npy')
491
                self.img_npy = [self.im_cache_dir / Path(f).with_suffix('.npy').name for f in self.img_files]
492
                self.im_cache_dir.mkdir(parents=True, exist_ok=True)
493
            gb = 0  # Gigabytes of cached images
494
            self.img_hw0, self.img_hw = [None] * n, [None] * n
495
            results = ThreadPool(NUM_THREADS).imap(lambda x: load_image(*x), zip(repeat(self), range(n)))
496
            pbar = tqdm(enumerate(results), total=n)
497
            for i, x in pbar:
498
                if cache_images == 'disk':
499
                    if not self.img_npy[i].exists():
500
                        np.save(self.img_npy[i].as_posix(), x[0])
501
                    gb += self.img_npy[i].stat().st_size
502
                else:
503
                    self.imgs[i], self.img_hw0[i], self.img_hw[i] = x  # im, hw_orig, hw_resized = load_image(self, i)
504
                    gb += self.imgs[i].nbytes
505
                pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})'
506
            pbar.close()
507
508
    def cache_labels(self, path=Path('./labels.cache'), prefix=''):
509
        # Cache dataset labels, check images and read shapes
510
        x = {}  # dict
511
        nm, nf, ne, nc, msgs = 0, 0, 0, 0, []  # number missing, found, empty, corrupt, messages
512
        desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
513
        with Pool(NUM_THREADS) as pool:
514
            pbar = tqdm(pool.imap(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))),
515
                        desc=desc, total=len(self.img_files))
516
            for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
517
                nm += nm_f
518
                nf += nf_f
519
                ne += ne_f
520
                nc += nc_f
521
                if im_file:
522
                    x[im_file] = [l, shape, segments]
523
                if msg:
524
                    msgs.append(msg)
525
                pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
526
527
        pbar.close()
528
        if msgs:
529
            LOGGER.info('\n'.join(msgs))
530
        if nf == 0:
531
            LOGGER.warning(f'{prefix}WARNING: No labels found in {path}. See {HELP_URL}')
532
        x['hash'] = get_hash(self.label_files + self.img_files)
533
        x['results'] = nf, nm, ne, nc, len(self.img_files)
534
        x['msgs'] = msgs  # warnings
535
        x['version'] = self.cache_version  # cache version
536
        try:
537
            np.save(path, x)  # save cache for next time
538
            path.with_suffix('.cache.npy').rename(path)  # remove .npy suffix
539
            LOGGER.info(f'{prefix}New cache created: {path}')
540
        except Exception as e:
541
            LOGGER.warning(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}')  # not writeable
542
        return x
543
544
    def __len__(self):
545
        return len(self.img_files)
546
547
    # def __iter__(self):
548
    #     self.count = -1
549
    #     print('ran dataset iter')
550
    #     #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
551
    #     return self
552
553
    def __getitem__(self, index):
554
        index = self.indices[index]  # linear, shuffled, or image_weights
555
556
        hyp = self.hyp
557
        mosaic = self.mosaic and random.random() < hyp['mosaic']
558
        if mosaic:
559
            # Load mosaic
560
            img, labels = load_mosaic(self, index)
561
            shapes = None
562
563
            # MixUp augmentation
564
            if random.random() < hyp['mixup']:
565
                img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.n - 1)))
566
567
        else:
568
            # Load image
569
            img, (h0, w0), (h, w) = load_image(self, index)
570
571
            # Letterbox
572
            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
573
            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
574
            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
575
576
            labels = self.labels[index].copy()
577
            if labels.size:  # normalized xywh to pixel xyxy format
578
                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
579
580
            if self.augment:
581
                img, labels = random_perspective(img, labels,
582
                                                 degrees=hyp['degrees'],
583
                                                 translate=hyp['translate'],
584
                                                 scale=hyp['scale'],
585
                                                 shear=hyp['shear'],
586
                                                 perspective=hyp['perspective'])
587
588
        nl = len(labels)  # number of labels
589
        if nl:
590
            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)
591
592
        if self.augment:
593
            # Albumentations
594
            img, labels = self.albumentations(img, labels)
595
            nl = len(labels)  # update after albumentations
596
597
            # HSV color-space
598
            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
599
600
            # Flip up-down
601
            if random.random() < hyp['flipud']:
602
                img = np.flipud(img)
603
                if nl:
604
                    labels[:, 2] = 1 - labels[:, 2]
605
606
            # Flip left-right
607
            if random.random() < hyp['fliplr']:
608
                img = np.fliplr(img)
609
                if nl:
610
                    labels[:, 1] = 1 - labels[:, 1]
611
612
            # Cutouts
613
            # labels = cutout(img, labels, p=0.5)
614
615
        labels_out = torch.zeros((nl, 6))
616
        if nl:
617
            labels_out[:, 1:] = torch.from_numpy(labels)
618
619
        # Convert
620
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
621
        img = np.ascontiguousarray(img)
622
623
        return torch.from_numpy(img), labels_out, self.img_files[index], shapes
624
625
    @staticmethod
626
    def collate_fn(batch):
627
        img, label, path, shapes = zip(*batch)  # transposed
628
        for i, l in enumerate(label):
629
            l[:, 0] = i  # add target image index for build_targets()
630
        return torch.stack(img, 0), torch.cat(label, 0), path, shapes
631
632
    @staticmethod
633
    def collate_fn4(batch):
634
        img, label, path, shapes = zip(*batch)  # transposed
635
        n = len(shapes) // 4
636
        img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
637
638
        ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]])
639
        wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]])
640
        s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]])  # scale
641
        for i in range(n):  # zidane torch.zeros(16,3,720,1280)  # BCHW
642
            i *= 4
643
            if random.random() < 0.5:
644
                im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear', align_corners=False)[
645
                    0].type(img[i].type())
646
                l = label[i]
647
            else:
648
                im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)
649
                l = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
650
            img4.append(im)
651
            label4.append(l)
652
653
        for i, l in enumerate(label4):
654
            l[:, 0] = i  # add target image index for build_targets()
655
656
        return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4
657
658
659
# Ancillary functions --------------------------------------------------------------------------------------------------
660
def load_image(self, i):
661
    # loads 1 image from dataset index 'i', returns im, original hw, resized hw
662
    im = self.imgs[i]
663
    if im is None:  # not cached in ram
664
        npy = self.img_npy[i]
665
        if npy and npy.exists():  # load npy
666
            im = np.load(npy)
667
        else:  # read image
668
            path = self.img_files[i]
669
            im = cv2.imread(path)  # BGR
670
            assert im is not None, f'Image Not Found {path}'
671
        h0, w0 = im.shape[:2]  # orig hw
672
        r = self.img_size / max(h0, w0)  # ratio
673
        if r != 1:  # if sizes are not equal
674
            im = cv2.resize(im, (int(w0 * r), int(h0 * r)),
675
                            interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
676
        return im, (h0, w0), im.shape[:2]  # im, hw_original, hw_resized
677
    else:
678
        return self.imgs[i], self.img_hw0[i], self.img_hw[i]  # im, hw_original, hw_resized
679
680
681
def load_mosaic(self, index):
682
    # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
683
    labels4, segments4 = [], []
684
    s = self.img_size
685
    yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border)  # mosaic center x, y
686
    indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
687
    random.shuffle(indices)
688
    for i, index in enumerate(indices):
689
        # Load image
690
        img, _, (h, w) = load_image(self, index)
691
692
        # place img in img4
693
        if i == 0:  # top left
694
            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
695
            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
696
            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
697
        elif i == 1:  # top right
698
            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
699
            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
700
        elif i == 2:  # bottom left
701
            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
702
            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
703
        elif i == 3:  # bottom right
704
            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
705
            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
706
707
        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
708
        padw = x1a - x1b
709
        padh = y1a - y1b
710
711
        # Labels
712
        labels, segments = self.labels[index].copy(), self.segments[index].copy()
713
        if labels.size:
714
            labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
715
            segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
716
        labels4.append(labels)
717
        segments4.extend(segments)
718
719
    # Concat/clip labels
720
    labels4 = np.concatenate(labels4, 0)
721
    for x in (labels4[:, 1:], *segments4):
722
        np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
723
    # img4, labels4 = replicate(img4, labels4)  # replicate
724
725
    # Augment
726
    img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste'])
727
    img4, labels4 = random_perspective(img4, labels4, segments4,
728
                                       degrees=self.hyp['degrees'],
729
                                       translate=self.hyp['translate'],
730
                                       scale=self.hyp['scale'],
731
                                       shear=self.hyp['shear'],
732
                                       perspective=self.hyp['perspective'],
733
                                       border=self.mosaic_border)  # border to remove
734
735
    return img4, labels4
736
737
738
def load_mosaic9(self, index):
739
    # YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic
740
    labels9, segments9 = [], []
741
    s = self.img_size
742
    indices = [index] + random.choices(self.indices, k=8)  # 8 additional image indices
743
    random.shuffle(indices)
744
    for i, index in enumerate(indices):
745
        # Load image
746
        img, _, (h, w) = load_image(self, index)
747
748
        # place img in img9
749
        if i == 0:  # center
750
            img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
751
            h0, w0 = h, w
752
            c = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinates
753
        elif i == 1:  # top
754
            c = s, s - h, s + w, s
755
        elif i == 2:  # top right
756
            c = s + wp, s - h, s + wp + w, s
757
        elif i == 3:  # right
758
            c = s + w0, s, s + w0 + w, s + h
759
        elif i == 4:  # bottom right
760
            c = s + w0, s + hp, s + w0 + w, s + hp + h
761
        elif i == 5:  # bottom
762
            c = s + w0 - w, s + h0, s + w0, s + h0 + h
763
        elif i == 6:  # bottom left
764
            c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
765
        elif i == 7:  # left
766
            c = s - w, s + h0 - h, s, s + h0
767
        elif i == 8:  # top left
768
            c = s - w, s + h0 - hp - h, s, s + h0 - hp
769
770
        padx, pady = c[:2]
771
        x1, y1, x2, y2 = (max(x, 0) for x in c)  # allocate coords
772
773
        # Labels
774
        labels, segments = self.labels[index].copy(), self.segments[index].copy()
775
        if labels.size:
776
            labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady)  # normalized xywh to pixel xyxy format
777
            segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
778
        labels9.append(labels)
779
        segments9.extend(segments)
780
781
        # Image
782
        img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:]  # img9[ymin:ymax, xmin:xmax]
783
        hp, wp = h, w  # height, width previous
784
785
    # Offset
786
    yc, xc = (int(random.uniform(0, s)) for _ in self.mosaic_border)  # mosaic center x, y
787
    img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
788
789
    # Concat/clip labels
790
    labels9 = np.concatenate(labels9, 0)
791
    labels9[:, [1, 3]] -= xc
792
    labels9[:, [2, 4]] -= yc
793
    c = np.array([xc, yc])  # centers
794
    segments9 = [x - c for x in segments9]
795
796
    for x in (labels9[:, 1:], *segments9):
797
        np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
798
    # img9, labels9 = replicate(img9, labels9)  # replicate
799
800
    # Augment
801
    img9, labels9 = random_perspective(img9, labels9, segments9,
802
                                       degrees=self.hyp['degrees'],
803
                                       translate=self.hyp['translate'],
804
                                       scale=self.hyp['scale'],
805
                                       shear=self.hyp['shear'],
806
                                       perspective=self.hyp['perspective'],
807
                                       border=self.mosaic_border)  # border to remove
808
809
    return img9, labels9
810
811
812
def create_folder(path='./new'):
813
    # Create folder
814
    if os.path.exists(path):
815
        shutil.rmtree(path)  # delete output folder
816
    os.makedirs(path)  # make new output folder
817
818
819
def flatten_recursive(path='../datasets/coco128'):
820
    # Flatten a recursive directory by bringing all files to top level
821
    new_path = Path(path + '_flat')
822
    create_folder(new_path)
823
    for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
824
        shutil.copyfile(file, new_path / Path(file).name)
825
826
827
def extract_boxes(path='../datasets/coco128'):  # from utils.datasets import *; extract_boxes()
828
    # Convert detection dataset into classification dataset, with one directory per class
829
    path = Path(path)  # images dir
830
    shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None  # remove existing
831
    files = list(path.rglob('*.*'))
832
    n = len(files)  # number of files
833
    for im_file in tqdm(files, total=n):
834
        if im_file.suffix[1:] in IMG_FORMATS:
835
            # image
836
            im = cv2.imread(str(im_file))[..., ::-1]  # BGR to RGB
837
            h, w = im.shape[:2]
838
839
            # labels
840
            lb_file = Path(img2label_paths([str(im_file)])[0])
841
            if Path(lb_file).exists():
842
                with open(lb_file) as f:
843
                    lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32)  # labels
844
845
                for j, x in enumerate(lb):
846
                    c = int(x[0])  # class
847
                    f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg'  # new filename
848
                    if not f.parent.is_dir():
849
                        f.parent.mkdir(parents=True)
850
851
                    b = x[1:] * [w, h, w, h]  # box
852
                    # b[2:] = b[2:].max()  # rectangle to square
853
                    b[2:] = b[2:] * 1.2 + 3  # pad
854
                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
855
856
                    b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
857
                    b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
858
                    assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
859
860
861
def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
862
    """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
863
    Usage: from utils.datasets import *; autosplit()
864
    Arguments
865
        path:            Path to images directory
866
        weights:         Train, val, test weights (list, tuple)
867
        annotated_only:  Only use images with an annotated txt file
868
    """
869
    path = Path(path)  # images dir
870
    files = sorted(x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS)  # image files only
871
    n = len(files)  # number of files
872
    random.seed(0)  # for reproducibility
873
    indices = random.choices([0, 1, 2], weights=weights, k=n)  # assign each image to a split
874
875
    txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt']  # 3 txt files
876
    [(path.parent / x).unlink(missing_ok=True) for x in txt]  # remove existing
877
878
    print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
879
    for i, img in tqdm(zip(indices, files), total=n):
880
        if not annotated_only or Path(img2label_paths([str(img)])[0]).exists():  # check label
881
            with open(path.parent / txt[i], 'a') as f:
882
                f.write('./' + img.relative_to(path.parent).as_posix() + '\n')  # add image to txt file
883
884
885
def verify_image_label(args):
886
    # Verify one image-label pair
887
    im_file, lb_file, prefix = args
888
    nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', []  # number (missing, found, empty, corrupt), message, segments
889
    try:
890
        # verify images
891
        im = Image.open(im_file)
892
        im.verify()  # PIL verify
893
        shape = exif_size(im)  # image size
894
        assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
895
        assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}'
896
        if im.format.lower() in ('jpg', 'jpeg'):
897
            with open(im_file, 'rb') as f:
898
                f.seek(-2, 2)
899
                if f.read() != b'\xff\xd9':  # corrupt JPEG
900
                    ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100)
901
                    msg = f'{prefix}WARNING: {im_file}: corrupt JPEG restored and saved'
902
903
        # verify labels
904
        if os.path.isfile(lb_file):
905
            nf = 1  # label found
906
            with open(lb_file) as f:
907
                l = [x.split() for x in f.read().strip().splitlines() if len(x)]
908
                if any([len(x) > 8 for x in l]):  # is segment
909
                    classes = np.array([x[0] for x in l], dtype=np.float32)
910
                    segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l]  # (cls, xy1...)
911
                    l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1)  # (cls, xywh)
912
                l = np.array(l, dtype=np.float32)
913
            nl = len(l)
914
            if nl:
915
                assert l.shape[1] == 5, f'labels require 5 columns, {l.shape[1]} columns detected'
916
                assert (l >= 0).all(), f'negative label values {l[l < 0]}'
917
                assert (l[:, 1:] <= 1).all(), f'non-normalized or out of bounds coordinates {l[:, 1:][l[:, 1:] > 1]}'
918
                _, i = np.unique(l, axis=0, return_index=True)
919
                if len(i) < nl:  # duplicate row check
920
                    l = l[i]  # remove duplicates
921
                    if segments:
922
                        segments = segments[i]
923
                    msg = f'{prefix}WARNING: {im_file}: {nl - len(i)} duplicate labels removed'
924
            else:
925
                ne = 1  # label empty
926
                l = np.zeros((0, 5), dtype=np.float32)
927
        else:
928
            nm = 1  # label missing
929
            l = np.zeros((0, 5), dtype=np.float32)
930
        return im_file, l, shape, segments, nm, nf, ne, nc, msg
931
    except Exception as e:
932
        nc = 1
933
        msg = f'{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}'
934
        return [None, None, None, None, nm, nf, ne, nc, msg]
935
936
937
def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profile=False, hub=False):
938
    """ Return dataset statistics dictionary with images and instances counts per split per class
939
    To run in parent directory: export PYTHONPATH="$PWD/yolov5"
940
    Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True)
941
    Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip')
942
    Arguments
943
        path:           Path to data.yaml or data.zip (with data.yaml inside data.zip)
944
        autodownload:   Attempt to download dataset if not found locally
945
        verbose:        Print stats dictionary
946
    """
947
948
    def round_labels(labels):
949
        # Update labels to integer class and 6 decimal place floats
950
        return [[int(c), *(round(x, 4) for x in points)] for c, *points in labels]
951
952
    def unzip(path):
953
        # Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
954
        if str(path).endswith('.zip'):  # path is data.zip
955
            assert Path(path).is_file(), f'Error unzipping {path}, file not found'
956
            ZipFile(path).extractall(path=path.parent)  # unzip
957
            dir = path.with_suffix('')  # dataset directory == zip name
958
            return True, str(dir), next(dir.rglob('*.yaml'))  # zipped, data_dir, yaml_path
959
        else:  # path is data.yaml
960
            return False, None, path
961
962
    def hub_ops(f, max_dim=1920):
963
        # HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing
964
        f_new = im_dir / Path(f).name  # dataset-hub image filename
965
        try:  # use PIL
966
            im = Image.open(f)
967
            r = max_dim / max(im.height, im.width)  # ratio
968
            if r < 1.0:  # image too large
969
                im = im.resize((int(im.width * r), int(im.height * r)))
970
            im.save(f_new, 'JPEG', quality=75, optimize=True)  # save
971
        except Exception as e:  # use OpenCV
972
            print(f'WARNING: HUB ops PIL failure {f}: {e}')
973
            im = cv2.imread(f)
974
            im_height, im_width = im.shape[:2]
975
            r = max_dim / max(im_height, im_width)  # ratio
976
            if r < 1.0:  # image too large
977
                im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA)
978
            cv2.imwrite(str(f_new), im)
979
980
    zipped, data_dir, yaml_path = unzip(Path(path))
981
    with open(check_yaml(yaml_path), errors='ignore') as f:
982
        data = yaml.safe_load(f)  # data dict
983
        if zipped:
984
            data['path'] = data_dir  # TODO: should this be dir.resolve()?
985
    check_dataset(data, autodownload)  # download dataset if missing
986
    hub_dir = Path(data['path'] + ('-hub' if hub else ''))
987
    stats = {'nc': data['nc'], 'names': data['names']}  # statistics dictionary
988
    for split in 'train', 'val', 'test':
989
        if data.get(split) is None:
990
            stats[split] = None  # i.e. no test set
991
            continue
992
        x = []
993
        dataset = LoadImagesAndLabels(data[split])  # load dataset
994
        for label in tqdm(dataset.labels, total=dataset.n, desc='Statistics'):
995
            x.append(np.bincount(label[:, 0].astype(int), minlength=data['nc']))
996
        x = np.array(x)  # shape(128x80)
997
        stats[split] = {'instance_stats': {'total': int(x.sum()), 'per_class': x.sum(0).tolist()},
998
                        'image_stats': {'total': dataset.n, 'unlabelled': int(np.all(x == 0, 1).sum()),
999
                                        'per_class': (x > 0).sum(0).tolist()},
1000
                        'labels': [{str(Path(k).name): round_labels(v.tolist())} for k, v in
1001
                                   zip(dataset.img_files, dataset.labels)]}
1002
1003
        if hub:
1004
            im_dir = hub_dir / 'images'
1005
            im_dir.mkdir(parents=True, exist_ok=True)
1006
            for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), total=dataset.n, desc='HUB Ops'):
1007
                pass
1008
1009
    # Profile
1010
    stats_path = hub_dir / 'stats.json'
1011
    if profile:
1012
        for _ in range(1):
1013
            file = stats_path.with_suffix('.npy')
1014
            t1 = time.time()
1015
            np.save(file, stats)
1016
            t2 = time.time()
1017
            x = np.load(file, allow_pickle=True)
1018
            print(f'stats.npy times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')
1019
1020
            file = stats_path.with_suffix('.json')
1021
            t1 = time.time()
1022
            with open(file, 'w') as f:
1023
                json.dump(stats, f)  # save stats *.json
1024
            t2 = time.time()
1025
            with open(file) as f:
1026
                x = json.load(f)  # load hyps dict
1027
            print(f'stats.json times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')
1028
1029
    # Save, print and return
1030
    if hub:
1031
        print(f'Saving {stats_path.resolve()}...')
1032
        with open(stats_path, 'w') as f:
1033
            json.dump(stats, f)  # save stats.json
1034
    if verbose:
1035
        print(json.dumps(stats, indent=2, sort_keys=False))
1036
    return stats