--- a +++ b/tools/analysis/benchmark.py @@ -0,0 +1,94 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import time + +import torch +from mmcv import Config +from mmcv.cnn import fuse_conv_bn +from mmcv.parallel import MMDataParallel +from mmcv.runner.fp16_utils import wrap_fp16_model + +from mmaction.datasets import build_dataloader, build_dataset +from mmaction.models import build_model + + +def parse_args(): + parser = argparse.ArgumentParser( + description='MMAction2 benchmark a recognizer') + parser.add_argument('config', help='test config file path') + parser.add_argument( + '--log-interval', default=10, help='interval of logging') + parser.add_argument( + '--fuse-conv-bn', + action='store_true', + help='Whether to fuse conv and bn, this will slightly increase' + 'the inference speed') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + + cfg = Config.fromfile(args.config) + # set cudnn_benchmark + if cfg.get('cudnn_benchmark', False): + torch.backends.cudnn.benchmark = True + cfg.model.backbone.pretrained = None + cfg.data.test.test_mode = True + + # build the dataloader + dataset = build_dataset(cfg.data.test, dict(test_mode=True)) + data_loader = build_dataloader( + dataset, + videos_per_gpu=1, + workers_per_gpu=cfg.data.workers_per_gpu, + persistent_workers=cfg.data.get('persistent_workers', False), + dist=False, + shuffle=False) + + # build the model and load checkpoint + model = build_model( + cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) + fp16_cfg = cfg.get('fp16', None) + if fp16_cfg is not None: + wrap_fp16_model(model) + if args.fuse_conv_bn: + model = fuse_conv_bn(model) + + model = MMDataParallel(model, device_ids=[0]) + + model.eval() + + # the first several iterations may be very slow so skip them + num_warmup = 5 + pure_inf_time = 0 + + # benchmark with 2000 video and take the average + for i, data in enumerate(data_loader): + + torch.cuda.synchronize() + start_time = time.perf_counter() + + with torch.no_grad(): + model(return_loss=False, **data) + + torch.cuda.synchronize() + elapsed = time.perf_counter() - start_time + + if i >= num_warmup: + pure_inf_time += elapsed + if (i + 1) % args.log_interval == 0: + fps = (i + 1 - num_warmup) / pure_inf_time + print( + f'Done video [{i + 1:<3}/ 2000], fps: {fps:.1f} video / s') + + if (i + 1) == 200: + pure_inf_time += elapsed + fps = (i + 1 - num_warmup) / pure_inf_time + print(f'Overall fps: {fps:.1f} video / s') + break + + +if __name__ == '__main__': + main()