Diff of /train.py [000000] .. [190ca4]

Switch to side-by-side view

--- a
+++ b/train.py
@@ -0,0 +1,858 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""
+Train a YOLOv5 model on a custom dataset.
+Models and datasets download automatically from the latest YOLOv5 release.
+
+Usage - Single-GPU training:
+    $ python train.py --data coco128.yaml --weights yolov5s.pt --img 640  # from pretrained (recommended)
+    $ python train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640  # from scratch
+
+Usage - Multi-GPU DDP training:
+    $ python -m torch.distributed.run --nproc_per_node 4 --master_port 1 train.py --data coco128.yaml --weights yolov5s.pt --img 640 --device 0,1,2,3
+
+Models:     https://github.com/ultralytics/yolov5/tree/master/models
+Datasets:   https://github.com/ultralytics/yolov5/tree/master/data
+Tutorial:   https://docs.ultralytics.com/yolov5/tutorials/train_custom_data
+"""
+
+import argparse
+import math
+import os
+import random
+import subprocess
+import sys
+import time
+from copy import deepcopy
+from datetime import datetime, timedelta
+from pathlib import Path
+import torch.nn.functional as F
+from utils.general import xywh2xyxy,get_fixed_xyxy
+#from utils import custom_classifierCustomClassifier, train_and_evaluate, evaluate_classifier
+
+
+try:
+    import comet_ml  # must be imported before torch (if installed)
+except ImportError:
+    comet_ml = None
+
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+import yaml
+from torch.optim import lr_scheduler
+from tqdm import tqdm
+from torchvision.ops import roi_align
+from utils.general import get_object_level_feature_maps
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+import val as validate  # for end-of-epoch mAP
+from models.experimental import attempt_load
+from models.yolo import Model
+from utils.autoanchor import check_anchors
+from utils.autobatch import check_train_batch_size
+from utils.callbacks import Callbacks
+from utils.dataloaders import create_dataloader
+from utils.downloads import attempt_download, is_url
+from utils.general import (LOGGER, TQDM_BAR_FORMAT, check_amp, check_dataset, check_file, check_git_info,
+                           check_git_status, check_img_size, check_requirements, check_suffix, check_yaml, colorstr,
+                           get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights,
+                           labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer,
+                           yaml_save,plot_multi_channel_feature_map_with_boxes,xywh_to_xyxy)
+from utils.loggers import LOGGERS, Loggers
+from utils.loggers.comet.comet_utils import check_comet_resume
+from utils.loss import ComputeLoss
+from utils.metrics import fitness
+from utils.plots import plot_evolve
+from utils.custom_classifier import CustomClassifier, train_model_once
+from utils.my_model import MyCNN,cell_training
+from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, select_device, smart_DDP, smart_optimizer,
+                               smart_resume, torch_distributed_zero_first)
+
+LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
+RANK = int(os.getenv('RANK', -1))
+WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
+GIT_INFO = check_git_info()
+
+
+
+def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
+    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \
+        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
+        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
+    callbacks.run('on_pretrain_routine_start')
+
+
+    cell_attribute_model = MyCNN(num_classes=12, dropout_prob=0.5, in_channels=480).to(device)
+    # cell_attribute_model.load_state_dict(torch.load('Attribute_model/best_weights_0.8056662588308221_51.pth'))
+    #cell_attribute_model.train() 
+    
+    #step_size = 5
+   # gamma = 0.01
+   # scheduler_cell_model = lr_scheduler.StepLR(optimizer_cell_model, step_size=step_size, gamma=gamma)
+
+    # Directories
+    w = save_dir / 'weights'  # weights dir
+    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
+    last, best = w / 'last.pt', w / 'best.pt'
+
+    # Hyperparameters
+    if isinstance(hyp, str):
+        with open(hyp, errors='ignore') as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+    LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
+    opt.hyp = hyp.copy()  # for saving hyps to checkpoints
+
+    # Save run settings
+    if not evolve:
+        yaml_save(save_dir / 'hyp.yaml', hyp)
+        yaml_save(save_dir / 'opt.yaml', vars(opt))
+
+    # Loggers
+    data_dict = None
+    if RANK in {-1, 0}:
+        include_loggers = list(LOGGERS)
+        if getattr(opt, 'ndjson_console', False):
+            include_loggers.append('ndjson_console')
+        if getattr(opt, 'ndjson_file', False):
+            include_loggers.append('ndjson_file')
+
+        loggers = Loggers(
+            save_dir=save_dir,
+            weights=weights,
+            opt=opt,
+            hyp=hyp,
+            logger=LOGGER,
+            include=tuple(include_loggers),
+        )
+
+        # Register actions
+        for k in methods(loggers):
+            callbacks.register_action(k, callback=getattr(loggers, k))
+
+        # Process custom dataset artifact link
+        data_dict = loggers.remote_dataset
+        if resume:  # If resuming runs from remote artifact
+            weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size
+
+    # Config
+    plots = not evolve and not opt.noplots  # create plots
+    cuda = device.type != 'cpu'
+    init_seeds(opt.seed + 1 + RANK, deterministic=True)
+    with torch_distributed_zero_first(LOCAL_RANK):
+        data_dict = data_dict or check_dataset(data)  # check if None
+    train_path, val_path = data_dict['train'], data_dict['val']
+    nc = 1 if single_cls else int(data_dict['nc'])  # number of classes
+    names = {0: 'item'} if single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
+    is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt')  # COCO dataset
+
+    # Model
+    check_suffix(weights, '.pt')  # check weights
+    pretrained = weights.endswith('.pt')
+    if pretrained:
+        with torch_distributed_zero_first(LOCAL_RANK):
+            weights = attempt_download(weights)  # download if not found locally
+        ckpt = torch.load(weights, map_location='cpu')  # load checkpoint to CPU to avoid CUDA memory leak
+        model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
+        exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else []  # exclude keys
+        csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
+        csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)  # intersect
+        model.load_state_dict(csd, strict=False)  # load
+        LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}')  # report
+    else:
+        model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
+    amp = check_amp(model)  # check AMP
+
+    # Freeze
+    freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # layers to freeze
+    for k, v in model.named_parameters():
+        v.requires_grad = True  # train all layers
+        # v.register_hook(lambda x: torch.nan_to_num(x))  # NaN to 0 (commented for erratic training results)
+        if any(x in k for x in freeze):
+            LOGGER.info(f'freezing {k}')
+            v.requires_grad = False
+
+    # Image size
+    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
+    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
+
+    # Batch size
+    if RANK == -1 and batch_size == -1:  # single-GPU only, estimate best batch size
+        batch_size = check_train_batch_size(model, imgsz, amp)
+        loggers.on_params_update({'batch_size': batch_size})
+
+    # Optimizer
+    nbs = 64  # nominal batch size
+    accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
+    hyp['weight_decay'] *= batch_size * accumulate / nbs  # scale weight_decay
+    optimizer = smart_optimizer(model, opt.optimizer, hyp['lr0'], hyp['momentum'], hyp['weight_decay'])
+    #optimizer_cell_model = torch.optim.Adam(cell_attribute_model.parameters(), opt.optimizer, hyp['lr0'], hyp['momentum'], hyp['weight_decay'])
+    optimizer_cell_model = torch.optim.SGD(cell_attribute_model.parameters(), lr=hyp['lr0'],momentum= hyp['momentum'], weight_decay=hyp['weight_decay'])
+
+    # Scheduler
+    if opt.cos_lr:
+        lf = one_cycle(1, hyp['lrf'], epochs)  # cosine 1->hyp['lrf']
+    else:
+        lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf']  # linear
+    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
+    scheduler_cell_model = lr_scheduler.LambdaLR(optimizer_cell_model, lr_lambda=lf)  # plot_lr_scheduler(optimizer, scheduler, epochs)
+
+    
+
+    # EMA
+    ema = ModelEMA(model) if RANK in {-1, 0} else None
+
+    # Resume
+    best_fitness, start_epoch = 0.0, 0
+    if pretrained:
+        if resume:
+            best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume)
+        del ckpt, csd
+
+    # DP mode
+    if cuda and RANK == -1 and torch.cuda.device_count() > 1:
+        LOGGER.warning(
+            'WARNING ⚠️ DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n'
+            'See Multi-GPU Tutorial at https://docs.ultralytics.com/yolov5/tutorials/multi_gpu_training to get started.'
+        )
+        model = torch.nn.DataParallel(model)
+
+    # SyncBatchNorm
+    if opt.sync_bn and cuda and RANK != -1:
+        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
+        LOGGER.info('Using SyncBatchNorm()')
+
+    # Trainloader
+    train_loader, dataset = create_dataloader(train_path,
+                                              imgsz,
+                                              batch_size // WORLD_SIZE,
+                                              gs,
+                                              single_cls,
+                                              hyp=hyp,
+                                              augment=True,
+                                              cache=None if opt.cache == 'val' else opt.cache,
+                                              rect=opt.rect,
+                                              rank=LOCAL_RANK,
+                                              workers=workers,
+                                              image_weights=opt.image_weights,
+                                              quad=opt.quad,
+                                              prefix=colorstr('train: '),
+                                              shuffle=True,
+                                              seed=opt.seed)
+    labels = np.concatenate(dataset.labels, 0)
+    mlc = int(labels[:, 0].max())  # max label class
+    assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
+
+    # Process 0
+    if RANK in {-1, 0}:
+        val_loader = create_dataloader(val_path,
+                                       imgsz,
+                                       batch_size // WORLD_SIZE * 2,
+                                       gs,
+                                       single_cls,
+                                       hyp=hyp,
+                                       cache=None if noval else opt.cache,
+                                       rect=True,
+                                       rank=-1,
+                                       workers=workers * 2,
+                                       pad=0.5,
+                                       prefix=colorstr('val: '))[0]
+
+        if not resume:
+            if not opt.noautoanchor:
+                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)  # run AutoAnchor
+            model.half().float()  # pre-reduce anchor precision
+
+        callbacks.run('on_pretrain_routine_end', labels, names)
+
+    # DDP mode
+    if cuda and RANK != -1:
+        model = smart_DDP(model)
+
+    # Model attributes
+    nl = de_parallel(model).model[-1].nl  # number of detection layers (to scale hyps)
+    hyp['box'] *= 3 / nl  # scale to layers
+    hyp['cls'] *= nc / 80 * 3 / nl  # scale to classes and layers
+    hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
+    hyp['label_smoothing'] = opt.label_smoothing
+    model.nc = nc  # attach number of classes to model
+    model.hyp = hyp  # attach hyperparameters to model
+    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights
+    model.names = names
+
+    # Start training
+    t0 = time.time()
+    nb = len(train_loader)  # number of batches
+    nw = max(round(hyp['warmup_epochs'] * nb), 100)  # number of warmup iterations, max(3 epochs, 100 iterations)
+    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
+    last_opt_step = -1
+    maps = np.zeros(nc)  # mAP per class
+    results = (0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
+    scheduler.last_epoch = start_epoch - 1  # do not move
+    scaler = torch.cuda.amp.GradScaler(enabled=amp)
+    stopper, stop = EarlyStopping(patience=opt.patience), False
+    compute_loss = ComputeLoss(model)  # init loss class
+    callbacks.run('on_train_start')
+    LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
+                f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
+                f"Logging results to {colorstr('bold', save_dir)}\n"
+                f'Starting training for {epochs} epochs...')
+    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
+        callbacks.run('on_train_epoch_start')
+        model.train()
+        cell_attribute_model.train() 
+
+        # Update image weights (optional, single-GPU only)
+        if opt.image_weights:
+            cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
+            iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
+            dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
+
+        # Update mosaic border (optional)
+        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
+        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
+
+        mloss = torch.zeros(3, device=device)  # mean losses
+        if RANK != -1:
+            train_loader.sampler.set_epoch(epoch)
+        pbar = enumerate(train_loader)
+        LOGGER.info(('\n' + '%11s' * 8) % ('Epoch', 'GPU_mem', 'box_loss', 'obj_loss', 'cls_loss', 'attr_loss', 'Instances', 'Size'))
+        if RANK in {-1, 0}:
+            pbar = tqdm(pbar, total=nb, bar_format=TQDM_BAR_FORMAT)  # progress bar
+        optimizer.zero_grad()
+        avg_attribute_loss= 0
+        length_of_data=0
+        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
+            
+            callbacks.run('on_train_batch_start')
+            ni = i + nb * epoch  # number integrated batches (since train start)
+            imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
+
+            # Warmup
+            if ni <= nw:
+                xi = [0, nw]  # x interp
+                # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
+                accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
+                for j, x in enumerate(optimizer.param_groups):
+                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
+                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)])
+                    if 'momentum' in x:
+                        x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
+
+            # Multi-scale
+            if opt.multi_scale:
+                sz = random.randrange(int(imgsz * 0.5), int(imgsz * 1.5) + gs) // gs * gs  # size
+                sf = sz / max(imgs.shape[2:])  # scale factor
+                if sf != 1:
+                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
+                    imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
+
+            # Forward
+            with torch.cuda.amp.autocast(amp):
+                pred,int_feat = model(imgs)  # forward
+                
+                #batch_obj = 0
+                Num_targets = len(targets)
+                pooled_feature_map_batch = []
+                optimizer_cell_model.zero_grad()  
+
+                for i in range(Num_targets):
+                    img_num = int(targets[i,0].item())
+
+                    p2_feature_map =int_feat[0][img_num] # imgs[img_num] 
+                    p3_feature_map = int_feat[1][img_num]
+
+                    x_center = targets[i, 2]
+                    y_center = targets[i, 3]
+                    width = targets[i, 4]
+                    height = targets[i, 5]
+                    bb = [round(x_center.item(),4), round(y_center.item(),4), round(width.item(),4), round(height.item(),4)]
+                    p2_feature_shape_tensor = torch.tensor([int_feat[0][img_num].shape[1], int_feat[0][img_num].shape[2],int_feat[0][img_num].shape[1],int_feat[0][img_num].shape[2]])                        # reduce_channels_layer = torch.nn.Conv2d(1280, 250, kernel_size=1).to(device)
+                    p3_feature_shape_tensor = torch.tensor([int_feat[1][img_num].shape[1], int_feat[1][img_num].shape[2],int_feat[1][img_num].shape[1],int_feat[1][img_num].shape[2]])                        # reduce_channels_layer = torch.nn.Conv2d(1280, 250, kernel_size=1).to(device)
+                        # reduce_channels_layer = torch.nn.Conv2d(1280, 250, kernel_size=1).to(device)
+
+                    p2_normalized_xyxy = xywh_to_xyxy(bb)*p2_feature_shape_tensor #imgs.shape[2]
+                    p3_normalized_xyxy = xywh_to_xyxy(bb)*p3_feature_shape_tensor #imgs.shape[2]
+
+
+                    p2_x_min, p2_y_min, p2_x_max, p2_y_max = get_fixed_xyxy(p2_normalized_xyxy,p2_feature_map)
+                    p3_x_min, p3_y_min, p3_x_max, p3_y_max = get_fixed_xyxy(p3_normalized_xyxy,p3_feature_map)
+    
+                    batch_index = torch.tensor([0], dtype=torch.float32).to(device)
+
+                    p2_roi = torch.tensor([p2_x_min, p2_y_min, p2_x_max, p2_y_max], device=device).float() 
+                    p3_roi = torch.tensor([p3_x_min, p3_y_min, p3_x_max, p3_y_max], device=device).float() 
+
+
+                    # Concatenate the batch index to the bounding box coordinates
+                    p2_roi_with_batch_index = torch.cat([batch_index, p2_roi])
+                    p3_roi_with_batch_index = torch.cat([batch_index, p3_roi])
+
+                    # relevant_feature_map = p3_feature_map.unsqueeze(0)[:, :, y_min:y_max, x_min:x_max]
+                    p2_resized_object = roi_align(p2_feature_map.unsqueeze(0), p2_roi_with_batch_index.unsqueeze(0).to(device), output_size=(24, 30))
+                    p3_resized_object = roi_align(p3_feature_map.unsqueeze(0), p3_roi_with_batch_index.unsqueeze(0).to(device), output_size=(24, 30))
+                    concat_box = torch.cat([p2_resized_object,p3_resized_object],dim=1)
+
+                    
+                    pooled_feature_map_batch.append(concat_box)
+                cell_attribute_loss= cell_training(cell_attribute_model,pooled_feature_map_batch, targets[:,6:13].to(device))
+                    # del concatenated_features
+                cell_attribute_loss.backward(retain_graph=True)
+                optimizer_cell_model.step()
+                
+                avg_attribute_loss+=cell_attribute_loss.item()
+                length_of_data+=1
+   
+
+                loss, loss_items = compute_loss(pred, targets[:,0:6].to(device))  # loss scaled by batch_size I changed here
+                if RANK != -1:
+                    loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
+                if opt.quad:
+                    loss *= 4.
+
+            # Backward
+            scaler.scale(loss).backward()
+
+            # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html
+            if ni - last_opt_step >= accumulate:
+                scaler.unscale_(optimizer)  # unscale gradients
+                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0)  # clip gradients
+                scaler.step(optimizer)  # optimizer.step
+                scaler.update()
+                optimizer.zero_grad()
+                if ema:
+                    ema.update(model)
+                last_opt_step = ni
+
+            # Log
+            if RANK in {-1, 0}:
+                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
+                mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G'  # (GB)
+                avg_attr_loss = avg_attribute_loss / length_of_data  # Calculate the average attribute loss
+
+                pbar.set_description(('%11s' * 2 + '%11.4g' * 6) %
+                                     (f'{epoch}/{epochs - 1}', mem, *mloss,avg_attr_loss, targets.shape[0], imgs.shape[-1]))
+                callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths, list(mloss))
+                if callbacks.stop_training:
+                    return
+            # end batch ------------------------------------------------------------------------------------------------
+        # print("Attribute_average_loss=   ",avg_attribute_loss/length_of_data)
+        # Scheduler
+        lr = [x['lr'] for x in optimizer.param_groups]  # for loggers
+        scheduler.step()
+        scheduler_cell_model.step()
+
+
+        if RANK in {-1, 0}:
+        #   if epoch > 50:
+            # mAP
+            callbacks.run('on_train_epoch_end', epoch=epoch)
+            ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
+            final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
+            if not noval or final_epoch:  # Calculate mAP
+                results, maps, _ = validate.run(data_dict,cell_attribute_model,
+                                                batch_size=1,# batch_size // WORLD_SIZE * 2,
+                                                imgsz=imgsz,
+                                                half=amp,
+                                                model=ema.ema,
+                                                single_cls=single_cls,
+                                                dataloader=val_loader,
+                                                save_dir=save_dir,
+                                                plots=False,
+                                                callbacks=callbacks,
+                                                compute_loss=compute_loss
+                                                )
+
+            # Update best mAP
+            fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
+            stop = stopper(epoch=epoch, fitness=fi)  # early stop check
+            if fi > best_fitness:
+                best_fitness = fi
+            log_vals = list(mloss) + list(results) + lr
+            callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
+
+            # Save model
+            if (not nosave) or (final_epoch and not evolve):  # if save
+                ckpt = {
+                    'epoch': epoch,
+                    'best_fitness': best_fitness,
+                    'model': deepcopy(de_parallel(model)).half(),
+                    'ema': deepcopy(ema.ema).half(),
+                    'updates': ema.updates,
+                    'optimizer': optimizer.state_dict(),
+                    'opt': vars(opt),
+                    'git': GIT_INFO,  # {remote, branch, commit} if a git repo
+                    'date': datetime.now().isoformat()}
+
+                # Save last, best and delete
+                torch.save(ckpt, last)
+                if best_fitness == fi:
+                    torch.save(ckpt, best)
+                if opt.save_period > 0 and epoch % opt.save_period == 0:
+                    torch.save(ckpt, w / f'epoch{epoch}.pt')
+                del ckpt
+                callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)
+
+        # EarlyStopping
+        if RANK != -1:  # if DDP training
+            broadcast_list = [stop if RANK == 0 else None]
+            dist.broadcast_object_list(broadcast_list, 0)  # broadcast 'stop' to all ranks
+            if RANK != 0:
+                stop = broadcast_list[0]
+        if stop:
+            break  # must break all DDP ranks
+
+        # end epoch ----------------------------------------------------------------------------------------------------
+    # end training -----------------------------------------------------------------------------------------------------
+    if RANK in {-1, 0}:
+        LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
+        for f in last, best:
+            if f.exists():
+                strip_optimizer(f)  # strip optimizers
+                if f is best:
+                    LOGGER.info(f'\nValidating {f}...')
+                    results, _, _ = validate.run(
+                        data_dict, cell_attribute_model,
+                        batch_size=batch_size // WORLD_SIZE * 2,
+                        imgsz=imgsz,
+                        model=attempt_load(f, device).half(),
+                        iou_thres=0.65 if is_coco else 0.60,  # best pycocotools at iou 0.65
+                        single_cls=single_cls,
+                        dataloader=val_loader,
+                        save_dir=save_dir,
+                        save_json=is_coco,
+                        verbose=True,
+                        plots=plots,
+                        callbacks=callbacks,
+                        compute_loss=compute_loss)  # val best model with plots
+                    if is_coco:
+                        callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
+
+        callbacks.run('on_train_end', last, best, epoch, results)
+
+    torch.cuda.empty_cache()
+    return results
+
+
+def parse_opt(known=False):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
+    parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
+    parser.add_argument('--epochs', type=int, default=100, help='total training epochs')
+    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
+    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
+    parser.add_argument('--rect', action='store_true', help='rectangular training')
+    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
+    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
+    parser.add_argument('--noval', action='store_true', help='only validate final epoch')
+    parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
+    parser.add_argument('--noplots', action='store_true', help='save no plot files')
+    parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
+    parser.add_argument('--evolve_population',
+                        type=str,
+                        default=ROOT / 'data/hyps',
+                        help='location for loading population')
+    parser.add_argument('--resume_evolve', type=str, default=None, help='resume evolve from last generation')
+    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
+    parser.add_argument('--cache', type=str, nargs='?', const='ram', help='image --cache ram/disk')
+    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
+    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
+    parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
+    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
+    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
+    parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
+    parser.add_argument('--name', default='exp', help='save to project/name')
+    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--quad', action='store_true', help='quad dataloader')
+    parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
+    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
+    parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
+    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
+    parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
+    parser.add_argument('--seed', type=int, default=0, help='Global training seed')
+    parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
+
+    # Logger arguments
+    parser.add_argument('--entity', default=None, help='Entity')
+    parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='Upload data, "val" option')
+    parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval')
+    parser.add_argument('--artifact_alias', type=str, default='latest', help='Version of dataset artifact to use')
+
+    # NDJSON logging
+    parser.add_argument('--ndjson-console', action='store_true', help='Log ndjson to console')
+    parser.add_argument('--ndjson-file', action='store_true', help='Log ndjson to file')
+
+    return parser.parse_known_args()[0] if known else parser.parse_args()
+
+
+def main(opt, callbacks=Callbacks()):
+    # Checks
+    if RANK in {-1, 0}:
+        print_args(vars(opt))
+        check_git_status()
+        check_requirements(ROOT / 'requirements.txt')
+
+    # Resume (from specified or most recent last.pt)
+    if opt.resume and not check_comet_resume(opt) and not opt.evolve:
+        last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run())
+        opt_yaml = last.parent.parent / 'opt.yaml'  # train options yaml
+        opt_data = opt.data  # original dataset
+        if opt_yaml.is_file():
+            with open(opt_yaml, errors='ignore') as f:
+                d = yaml.safe_load(f)
+        else:
+            d = torch.load(last, map_location='cpu')['opt']
+        opt = argparse.Namespace(**d)  # replace
+        opt.cfg, opt.weights, opt.resume = '', str(last), True  # reinstate
+        if is_url(opt_data):
+            opt.data = check_file(opt_data)  # avoid HUB resume auth timeout
+    else:
+        opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \
+            check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project)  # checks
+        assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
+        if opt.evolve:
+            if opt.project == str(ROOT / 'runs/train'):  # if default project name, rename to runs/evolve
+                opt.project = str(ROOT / 'runs/evolve')
+            opt.exist_ok, opt.resume = opt.resume, False  # pass resume to exist_ok and disable resume
+        if opt.name == 'cfg':
+            opt.name = Path(opt.cfg).stem  # use model.yaml as name
+        opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
+
+    # DDP mode
+    device = select_device(opt.device, batch_size=opt.batch_size)
+    if LOCAL_RANK != -1:
+        msg = 'is not compatible with YOLOv5 Multi-GPU DDP training'
+        assert not opt.image_weights, f'--image-weights {msg}'
+        assert not opt.evolve, f'--evolve {msg}'
+        assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size'
+        assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE'
+        assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
+        torch.cuda.set_device(LOCAL_RANK)
+        device = torch.device('cuda', LOCAL_RANK)
+        dist.init_process_group(backend='nccl' if dist.is_nccl_available() else 'gloo',
+                                timeout=timedelta(seconds=10800))
+
+    # Train
+    if not opt.evolve:
+        train(opt.hyp, opt, device, callbacks)
+
+    # Evolve hyperparameters (optional)
+    else:
+        # Hyperparameter evolution metadata (including this hyperparameter True-False, lower_limit, upper_limit)
+        meta = {
+            'lr0': (False, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
+            'lrf': (False, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
+            'momentum': (False, 0.6, 0.98),  # SGD momentum/Adam beta1
+            'weight_decay': (False, 0.0, 0.001),  # optimizer weight decay
+            'warmup_epochs': (False, 0.0, 5.0),  # warmup epochs (fractions ok)
+            'warmup_momentum': (False, 0.0, 0.95),  # warmup initial momentum
+            'warmup_bias_lr': (False, 0.0, 0.2),  # warmup initial bias lr
+            'box': (False, 0.02, 0.2),  # box loss gain
+            'cls': (False, 0.2, 4.0),  # cls loss gain
+            'cls_pw': (False, 0.5, 2.0),  # cls BCELoss positive_weight
+            'obj': (False, 0.2, 4.0),  # obj loss gain (scale with pixels)
+            'obj_pw': (False, 0.5, 2.0),  # obj BCELoss positive_weight
+            'iou_t': (False, 0.1, 0.7),  # IoU training threshold
+            'anchor_t': (False, 2.0, 8.0),  # anchor-multiple threshold
+            'anchors': (False, 2.0, 10.0),  # anchors per output grid (0 to ignore)
+            'fl_gamma': (False, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
+            'hsv_h': (True, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
+            'hsv_s': (True, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
+            'hsv_v': (True, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
+            'degrees': (True, 0.0, 45.0),  # image rotation (+/- deg)
+            'translate': (True, 0.0, 0.9),  # image translation (+/- fraction)
+            'scale': (True, 0.0, 0.9),  # image scale (+/- gain)
+            'shear': (True, 0.0, 10.0),  # image shear (+/- deg)
+            'perspective': (True, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
+            'flipud': (True, 0.0, 1.0),  # image flip up-down (probability)
+            'fliplr': (True, 0.0, 1.0),  # image flip left-right (probability)
+            'mosaic': (True, 0.0, 1.0),  # image mixup (probability)
+            'mixup': (True, 0.0, 1.0),  # image mixup (probability)
+            'copy_paste': (True, 0.0, 1.0)}  # segment copy-paste (probability)
+
+        # GA configs
+        pop_size = 50
+        mutation_rate_min = 0.01
+        mutation_rate_max = 0.5
+        crossover_rate_min = 0.5
+        crossover_rate_max = 1
+        min_elite_size = 2
+        max_elite_size = 5
+        tournament_size_min = 2
+        tournament_size_max = 10
+
+        with open(opt.hyp, errors='ignore') as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+            if 'anchors' not in hyp:  # anchors commented in hyp.yaml
+                hyp['anchors'] = 3
+        if opt.noautoanchor:
+            del hyp['anchors'], meta['anchors']
+        opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir)  # only val/save final epoch
+        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
+        evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv'
+        if opt.bucket:
+            # download evolve.csv if exists
+            subprocess.run([
+                'gsutil',
+                'cp',
+                f'gs://{opt.bucket}/evolve.csv',
+                str(evolve_csv), ])
+
+        # Delete the items in meta dictionary whose first value is False
+        del_ = []
+        for item in meta.keys():
+            if meta[item][0] is False:
+                del_.append(item)
+        hyp_GA = hyp.copy()  # Make a copy of hyp dictionary
+        for item in del_:
+            del meta[item]  # Remove the item from meta dictionary
+            del hyp_GA[item]  # Remove the item from hyp_GA dictionary
+
+        # Set lower_limit and upper_limit arrays to hold the search space boundaries
+        lower_limit = np.array([meta[k][1] for k in hyp_GA.keys()])
+        upper_limit = np.array([meta[k][2] for k in hyp_GA.keys()])
+
+        # Create gene_ranges list to hold the range of values for each gene in the population
+        gene_ranges = []
+        for i in range(len(upper_limit)):
+            gene_ranges.append((lower_limit[i], upper_limit[i]))
+
+        # Initialize the population with initial_values or random values
+        initial_values = []
+
+        # If resuming evolution from a previous checkpoint
+        if opt.resume_evolve is not None:
+            assert os.path.isfile(ROOT / opt.resume_evolve), 'evolve population path is wrong!'
+            with open(ROOT / opt.resume_evolve, errors='ignore') as f:
+                evolve_population = yaml.safe_load(f)
+                for value in evolve_population.values():
+                    value = np.array([value[k] for k in hyp_GA.keys()])
+                    initial_values.append(list(value))
+
+        # If not resuming from a previous checkpoint, generate initial values from .yaml files in opt.evolve_population
+        else:
+            yaml_files = [f for f in os.listdir(opt.evolve_population) if f.endswith('.yaml')]
+            for file_name in yaml_files:
+                with open(os.path.join(opt.evolve_population, file_name)) as yaml_file:
+                    value = yaml.safe_load(yaml_file)
+                    value = np.array([value[k] for k in hyp_GA.keys()])
+                    initial_values.append(list(value))
+
+        # Generate random values within the search space for the rest of the population
+        if (initial_values is None):
+            population = [generate_individual(gene_ranges, len(hyp_GA)) for i in range(pop_size)]
+        else:
+            if (pop_size > 1):
+                population = [
+                    generate_individual(gene_ranges, len(hyp_GA)) for i in range(pop_size - len(initial_values))]
+                for initial_value in initial_values:
+                    population = [initial_value] + population
+
+        # Run the genetic algorithm for a fixed number of generations
+        list_keys = list(hyp_GA.keys())
+        for generation in range(opt.evolve):
+            if (generation >= 1):
+                save_dict = {}
+                for i in range(len(population)):
+                    little_dict = {}
+                    for j in range(len(population[i])):
+                        little_dict[list_keys[j]] = float(population[i][j])
+                    save_dict['gen' + str(generation) + 'number' + str(i)] = little_dict
+
+                with open(save_dir / 'evolve_population.yaml', 'w') as outfile:
+                    yaml.dump(save_dict, outfile, default_flow_style=False)
+
+            # Adaptive elite size
+            elite_size = min_elite_size + int((max_elite_size - min_elite_size) * (generation / opt.evolve))
+            # Evaluate the fitness of each individual in the population
+            fitness_scores = []
+            for individual in population:
+                for key, value in zip(hyp_GA.keys(), individual):
+                    hyp_GA[key] = value
+                hyp.update(hyp_GA)
+                results = train(hyp.copy(), opt, device, callbacks)
+                callbacks = Callbacks()
+                # Write mutation results
+                keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
+                        'val/box_loss', 'val/obj_loss', 'val/cls_loss')
+                print_mutation(keys, results, hyp.copy(), save_dir, opt.bucket)
+                fitness_scores.append(results[2])
+
+            # Select the fittest individuals for reproduction using adaptive tournament selection
+            selected_indices = []
+            for i in range(pop_size - elite_size):
+                # Adaptive tournament size
+                tournament_size = max(max(2, tournament_size_min),
+                                      int(min(tournament_size_max, pop_size) - (generation / (opt.evolve / 10))))
+                # Perform tournament selection to choose the best individual
+                tournament_indices = random.sample(range(pop_size), tournament_size)
+                tournament_fitness = [fitness_scores[j] for j in tournament_indices]
+                winner_index = tournament_indices[tournament_fitness.index(max(tournament_fitness))]
+                selected_indices.append(winner_index)
+
+            # Add the elite individuals to the selected indices
+            elite_indices = [i for i in range(pop_size) if fitness_scores[i] in sorted(fitness_scores)[-elite_size:]]
+            selected_indices.extend(elite_indices)
+            # Create the next generation through crossover and mutation
+            next_generation = []
+            for i in range(pop_size):
+                parent1_index = selected_indices[random.randint(0, pop_size - 1)]
+                parent2_index = selected_indices[random.randint(0, pop_size - 1)]
+                # Adaptive crossover rate
+                crossover_rate = max(crossover_rate_min,
+                                     min(crossover_rate_max, crossover_rate_max - (generation / opt.evolve)))
+                if random.uniform(0, 1) < crossover_rate:
+                    crossover_point = random.randint(1, len(hyp_GA) - 1)
+                    child = population[parent1_index][:crossover_point] + population[parent2_index][crossover_point:]
+                else:
+                    child = population[parent1_index]
+                # Adaptive mutation rate
+                mutation_rate = max(mutation_rate_min,
+                                    min(mutation_rate_max, mutation_rate_max - (generation / opt.evolve)))
+                for j in range(len(hyp_GA)):
+                    if random.uniform(0, 1) < mutation_rate:
+                        child[j] += random.uniform(-0.1, 0.1)
+                        child[j] = min(max(child[j], gene_ranges[j][0]), gene_ranges[j][1])
+                next_generation.append(child)
+            # Replace the old population with the new generation
+            population = next_generation
+        # Print the best solution found
+        best_index = fitness_scores.index(max(fitness_scores))
+        best_individual = population[best_index]
+        print('Best solution found:', best_individual)
+        # Plot results
+        plot_evolve(evolve_csv)
+        LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n'
+                    f"Results saved to {colorstr('bold', save_dir)}\n"
+                    f'Usage example: $ python train.py --hyp {evolve_yaml}')
+
+
+def generate_individual(input_ranges, individual_length):
+    individual = []
+    for i in range(individual_length):
+        lower_bound, upper_bound = input_ranges[i]
+        individual.append(random.uniform(lower_bound, upper_bound))
+    return individual
+
+
+def run(**kwargs):
+    # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
+    opt = parse_opt(True)
+    for k, v in kwargs.items():
+        setattr(opt, k, v)
+    main(opt)
+    return opt
+
+
+if __name__ == '__main__':
+    opt = parse_opt()
+    main(opt)