--- a +++ b/tools/misc/clip_feature_extraction.py @@ -0,0 +1,229 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import warnings +from datetime import datetime + +import mmcv +import numpy as np +import torch +import torch.distributed as dist +from mmcv import Config, DictAction +from mmcv.cnn import fuse_conv_bn +from mmcv.fileio.io import file_handlers +from mmcv.parallel import MMDataParallel, MMDistributedDataParallel +from mmcv.runner import get_dist_info, init_dist, load_checkpoint +from mmcv.runner.fp16_utils import wrap_fp16_model + +from mmaction.apis import multi_gpu_test, single_gpu_test +from mmaction.datasets import build_dataloader, build_dataset +from mmaction.models import build_model +from mmaction.utils import register_module_hooks + + +def parse_args(): + parser = argparse.ArgumentParser( + description='MMAction2 clip-level feature extraction') + parser.add_argument('config', help='test config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument('--video-list', help='video file list') + parser.add_argument('--video-root', help='video root directory') + parser.add_argument( + '--out', + default=None, + help='output result file in pkl/yaml/json format') + parser.add_argument( + '--fuse-conv-bn', + action='store_true', + help='Whether to fuse conv and bn, this will slightly increase' + 'the inference speed') + parser.add_argument( + '--gpu-collect', + action='store_true', + help='whether to use gpu to collect results') + parser.add_argument( + '--tmpdir', + help='tmp directory used for collecting results from multiple ' + 'workers, available when gpu-collect is not specified') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + default={}, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. For example, ' + "'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'") + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + parser.add_argument('--local_rank', type=int, default=0) + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + + return args + + +def turn_off_pretrained(cfg): + # recursively find all pretrained in the model config, + # and set them None to avoid redundant pretrain steps for testing + if 'pretrained' in cfg: + cfg.pretrained = None + + # recursively turn off pretrained value + for sub_cfg in cfg.values(): + if isinstance(sub_cfg, dict): + turn_off_pretrained(sub_cfg) + + +def text2tensor(text, size=256): + nums = [ord(x) for x in text] + assert len(nums) < size + nums.extend([0] * (size - len(nums))) + nums = np.array(nums, dtype=np.uint8) + return torch.from_numpy(nums) + + +def tensor2text(tensor): + # 0 may not occur in a string + chars = [chr(x) for x in tensor if x != 0] + return ''.join(chars) + + +def inference_pytorch(args, cfg, distributed, data_loader): + """Get predictions by pytorch models.""" + # remove redundant pretrain steps for testing + turn_off_pretrained(cfg.model) + + # build the model and load checkpoint + model = build_model( + cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) + + if len(cfg.module_hooks) > 0: + register_module_hooks(model, cfg.module_hooks) + + fp16_cfg = cfg.get('fp16', None) + if fp16_cfg is not None: + wrap_fp16_model(model) + load_checkpoint(model, args.checkpoint, map_location='cpu') + + if args.fuse_conv_bn: + model = fuse_conv_bn(model) + + if not distributed: + model = MMDataParallel(model, device_ids=[0]) + outputs = single_gpu_test(model, data_loader) + else: + model = MMDistributedDataParallel( + model.cuda(), + device_ids=[torch.cuda.current_device()], + broadcast_buffers=False) + outputs = multi_gpu_test(model, data_loader, args.tmpdir, + args.gpu_collect) + + return outputs + + +def main(): + args = parse_args() + + cfg = Config.fromfile(args.config) + + cfg.merge_from_dict(args.cfg_options) + + if cfg.model['test_cfg'] is None: + cfg.model['test_cfg'] = dict(feature_extraction=True) + else: + cfg.model['test_cfg']['feature_extraction'] = True + + # Load output_config from cfg + output_config = cfg.get('output_config', {}) + if args.out: + # Overwrite output_config from args.out + output_config = Config._merge_a_into_b( + dict(out=args.out), output_config) + + assert output_config, 'Please specify output filename with --out.' + + dataset_type = cfg.data.test.type + if output_config.get('out', None): + if 'output_format' in output_config: + # ugly workround to make recognition and localization the same + warnings.warn( + 'Skip checking `output_format` in localization task.') + else: + out = output_config['out'] + # make sure the dirname of the output path exists + mmcv.mkdir_or_exist(osp.dirname(out)) + _, suffix = osp.splitext(out) + assert dataset_type == 'VideoDataset' + + assert suffix[1:] in file_handlers, ( + 'The format of the output ' + 'file should be json, pickle or yaml') + + # set cudnn benchmark + if cfg.get('cudnn_benchmark', False): + torch.backends.cudnn.benchmark = True + cfg.data.test.test_mode = True + cfg.data.test.data_prefix = args.video_root + + # init distributed env first, since logger depends on the dist info. + if args.launcher == 'none': + distributed = False + else: + distributed = True + init_dist(args.launcher, **cfg.dist_params) + + rank, _ = get_dist_info() + + size = 256 + fname_tensor = torch.zeros(size, dtype=torch.uint8).cuda() + if rank == 0: + videos = open(args.video_list).readlines() + videos = [x.strip() for x in videos] + + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + fake_anno = f'fake_anno_{timestamp}.txt' + with open(fake_anno, 'w') as fout: + lines = [x + ' 0' for x in videos] + fout.write('\n'.join(lines)) + fname_tensor = text2tensor(fake_anno, size).cuda() + + if distributed: + dist.broadcast(fname_tensor.cuda(), src=0) + + fname = tensor2text(fname_tensor) + cfg.data.test.ann_file = fname + + # The flag is used to register module's hooks + cfg.setdefault('module_hooks', []) + + # build the dataloader + dataset = build_dataset(cfg.data.test, dict(test_mode=True)) + dataloader_setting = dict( + videos_per_gpu=cfg.data.get('videos_per_gpu', 1), + workers_per_gpu=cfg.data.get('workers_per_gpu', 1), + dist=distributed, + shuffle=False) + + dataloader_setting = dict(dataloader_setting, + **cfg.data.get('test_dataloader', {})) + data_loader = build_dataloader(dataset, **dataloader_setting) + + outputs = inference_pytorch(args, cfg, distributed, data_loader) + + if rank == 0: + if output_config.get('out', None): + out = output_config['out'] + print(f'\nwriting results to {out}') + dataset.dump_results(outputs, **output_config) + # remove the temporary file + os.remove(fake_anno) + + +if __name__ == '__main__': + main()