--- a +++ b/tests/test_runtime/test_inference.py @@ -0,0 +1,149 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmcv +import numpy as np +import pytest +import torch +import torch.nn as nn + +from mmaction.apis import inference_recognizer, init_recognizer + +video_config_file = 'configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py' # noqa: E501 +frame_config_file = 'configs/recognition/tsn/tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py' # noqa: E501 +flow_frame_config_file = 'configs/recognition/tsn/tsn_r50_320p_1x1x3_110e_kinetics400_flow.py' # noqa: E501 +video_path = 'demo/demo.mp4' +frames_path = 'tests/data/imgs' + + +def test_init_recognizer(): + with pytest.raises(TypeError): + # config must be a filename or Config object + init_recognizer(dict(config_file=None)) + + if torch.cuda.is_available(): + device = 'cuda:0' + else: + device = 'cpu' + + model = init_recognizer(video_config_file, None, device) + + config = mmcv.Config.fromfile(video_config_file) + config.model.backbone.pretrained = None + + isinstance(model, nn.Module) + if torch.cuda.is_available(): + assert next(model.parameters()).is_cuda is True + else: + assert next(model.parameters()).is_cuda is False + assert model.cfg.model.backbone.pretrained is None + + +def test_video_inference_recognizer(): + if torch.cuda.is_available(): + device = 'cuda:0' + else: + device = 'cpu' + model = init_recognizer(video_config_file, None, device) + + with pytest.raises(RuntimeError): + # video path doesn't exist + inference_recognizer(model, 'missing.mp4') + + for ops in model.cfg.data.test.pipeline: + if ops['type'] in ('TenCrop', 'ThreeCrop'): + # Use CenterCrop to reduce memory in order to pass CI + ops['type'] = 'CenterCrop' + + top5_label = inference_recognizer(model, video_path) + scores = [item[1] for item in top5_label] + assert len(top5_label) == 5 + assert scores == sorted(scores, reverse=True) + + _, feat = inference_recognizer( + model, video_path, outputs=('backbone', 'cls_head'), as_tensor=False) + assert isinstance(feat, dict) + assert 'backbone' in feat and 'cls_head' in feat + assert isinstance(feat['backbone'], np.ndarray) + assert isinstance(feat['cls_head'], np.ndarray) + assert feat['backbone'].shape == (25, 2048, 7, 7) + assert feat['cls_head'].shape == (1, 400) + + _, feat = inference_recognizer( + model, + video_path, + outputs=('backbone.layer3', 'backbone.layer3.1.conv1')) + assert 'backbone.layer3.1.conv1' in feat and 'backbone.layer3' in feat + assert isinstance(feat['backbone.layer3.1.conv1'], torch.Tensor) + assert isinstance(feat['backbone.layer3'], torch.Tensor) + assert feat['backbone.layer3'].size() == (25, 1024, 14, 14) + assert feat['backbone.layer3.1.conv1'].size() == (25, 256, 14, 14) + + cfg_file = 'configs/recognition/slowfast/slowfast_r50_video_inference_4x16x1_256e_kinetics400_rgb.py' # noqa: E501 + sf_model = init_recognizer(cfg_file, None, device) + for ops in sf_model.cfg.data.test.pipeline: + # Changes to reduce memory in order to pass CI + if ops['type'] in ('TenCrop', 'ThreeCrop'): + ops['type'] = 'CenterCrop' + if ops['type'] == 'SampleFrames': + ops['num_clips'] = 1 + _, feat = inference_recognizer( + sf_model, video_path, outputs=('backbone', 'cls_head')) + assert isinstance(feat, dict) and isinstance(feat['backbone'], tuple) + assert 'backbone' in feat and 'cls_head' in feat + assert len(feat['backbone']) == 2 + assert isinstance(feat['backbone'][0], torch.Tensor) + assert isinstance(feat['backbone'][1], torch.Tensor) + assert feat['backbone'][0].size() == (1, 2048, 4, 8, 8) + assert feat['backbone'][1].size() == (1, 256, 32, 8, 8) + assert feat['cls_head'].size() == (1, 400) + + +def test_frames_inference_recognizer(): + if torch.cuda.is_available(): + device = 'cuda:0' + else: + device = 'cpu' + rgb_model = init_recognizer(frame_config_file, None, device) + flow_model = init_recognizer(flow_frame_config_file, None, device) + + with pytest.raises(RuntimeError): + # video path doesn't exist + inference_recognizer(rgb_model, 'missing_path') + + for ops in rgb_model.cfg.data.test.pipeline: + if ops['type'] in ('TenCrop', 'ThreeCrop'): + # Use CenterCrop to reduce memory in order to pass CI + ops['type'] = 'CenterCrop' + ops['crop_size'] = 224 + for ops in flow_model.cfg.data.test.pipeline: + if ops['type'] in ('TenCrop', 'ThreeCrop'): + # Use CenterCrop to reduce memory in order to pass CI + ops['type'] = 'CenterCrop' + ops['crop_size'] = 224 + + top5_label = inference_recognizer(rgb_model, frames_path) + scores = [item[1] for item in top5_label] + assert len(top5_label) == 5 + assert scores == sorted(scores, reverse=True) + + _, feat = inference_recognizer( + flow_model, + frames_path, + outputs=('backbone', 'cls_head'), + as_tensor=False) + assert isinstance(feat, dict) + assert 'backbone' in feat and 'cls_head' in feat + assert isinstance(feat['backbone'], np.ndarray) + assert isinstance(feat['cls_head'], np.ndarray) + assert feat['backbone'].shape == (25, 2048, 7, 7) + assert feat['cls_head'].shape == (1, 400) + + _, feat = inference_recognizer( + rgb_model, + frames_path, + outputs=('backbone.layer3', 'backbone.layer3.1.conv1')) + + assert 'backbone.layer3.1.conv1' in feat and 'backbone.layer3' in feat + assert isinstance(feat['backbone.layer3.1.conv1'], torch.Tensor) + assert isinstance(feat['backbone.layer3'], torch.Tensor) + assert feat['backbone.layer3'].size() == (25, 1024, 14, 14) + assert feat['backbone.layer3.1.conv1'].size() == (25, 256, 14, 14)