# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import pytest
import torch
import torch.nn as nn
from mmcv.utils import assert_params_all_zeros
from mmaction.models.common import (LFB, TAM, Conv2plus1d, ConvAudio,
DividedSpatialAttentionWithNorm,
DividedTemporalAttentionWithNorm,
FFNWithNorm)
def test_conv2plus1d():
with pytest.raises(AssertionError):
# Length of kernel size, stride and padding must be the same
Conv2plus1d(3, 8, (2, 2))
conv_2plus1d = Conv2plus1d(3, 8, 2)
conv_2plus1d.init_weights()
assert torch.equal(conv_2plus1d.bn_s.weight,
torch.ones_like(conv_2plus1d.bn_s.weight))
assert torch.equal(conv_2plus1d.bn_s.bias,
torch.zeros_like(conv_2plus1d.bn_s.bias))
x = torch.rand(1, 3, 8, 256, 256)
output = conv_2plus1d(x)
assert output.shape == torch.Size([1, 8, 7, 255, 255])
def test_conv_audio():
conv_audio = ConvAudio(3, 8, 3)
conv_audio.init_weights()
x = torch.rand(1, 3, 8, 8)
output = conv_audio(x)
assert output.shape == torch.Size([1, 16, 8, 8])
conv_audio_sum = ConvAudio(3, 8, 3, op='sum')
output = conv_audio_sum(x)
assert output.shape == torch.Size([1, 8, 8, 8])
def test_divided_temporal_attention_with_norm():
_cfg = dict(embed_dims=768, num_heads=12, num_frames=8)
divided_temporal_attention = DividedTemporalAttentionWithNorm(**_cfg)
assert isinstance(divided_temporal_attention.norm, nn.LayerNorm)
assert assert_params_all_zeros(divided_temporal_attention.temporal_fc)
x = torch.rand(1, 1 + 8 * 14 * 14, 768)
output = divided_temporal_attention(x)
assert output.shape == torch.Size([1, 1 + 8 * 14 * 14, 768])
def test_divided_spatial_attention_with_norm():
_cfg = dict(embed_dims=512, num_heads=8, num_frames=4, dropout_layer=None)
divided_spatial_attention = DividedSpatialAttentionWithNorm(**_cfg)
assert isinstance(divided_spatial_attention.dropout_layer, nn.Identity)
assert isinstance(divided_spatial_attention.norm, nn.LayerNorm)
x = torch.rand(1, 1 + 4 * 14 * 14, 512)
output = divided_spatial_attention(x)
assert output.shape == torch.Size([1, 1 + 4 * 14 * 14, 512])
def test_ffn_with_norm():
_cfg = dict(
embed_dims=256, feedforward_channels=256 * 2, norm_cfg=dict(type='LN'))
ffn_with_norm = FFNWithNorm(**_cfg)
assert isinstance(ffn_with_norm.norm, nn.LayerNorm)
x = torch.rand(1, 1 + 4 * 14 * 14, 256)
output = ffn_with_norm(x)
assert output.shape == torch.Size([1, 1 + 4 * 14 * 14, 256])
def test_TAM():
"""test TAM."""
with pytest.raises(AssertionError):
# alpha must be a positive integer
TAM(16, 8, alpha=0, beta=4)
with pytest.raises(AssertionError):
# beta must be a positive integer
TAM(16, 8, alpha=2, beta=0)
with pytest.raises(AssertionError):
# the channels number of x should be equal to self.in_channels of TAM
tam = TAM(16, 8)
x = torch.rand(64, 8, 112, 112)
tam(x)
tam = TAM(16, 8)
x = torch.rand(32, 16, 112, 112)
output = tam(x)
assert output.shape == torch.Size([32, 16, 112, 112])
def test_LFB():
"""test LFB."""
with pytest.raises(ValueError):
LFB(lfb_prefix_path='./_non_exist_path')
lfb_prefix_path = osp.normpath(
osp.join(osp.dirname(__file__), '../data/lfb'))
with pytest.raises(AssertionError):
LFB(lfb_prefix_path=lfb_prefix_path, dataset_modes=100)
with pytest.raises(ValueError):
LFB(lfb_prefix_path=lfb_prefix_path, device='ceph')
# load on cpu
lfb_cpu = LFB(
lfb_prefix_path=lfb_prefix_path,
max_num_sampled_feat=5,
window_size=60,
lfb_channels=16,
dataset_modes=('unittest'),
device='cpu')
lt_feat_cpu = lfb_cpu['video_1,930']
assert lt_feat_cpu.shape == (5 * 60, 16)
assert len(lfb_cpu) == 1
# load on lmdb
lfb_lmdb = LFB(
lfb_prefix_path=lfb_prefix_path,
max_num_sampled_feat=3,
window_size=30,
lfb_channels=16,
dataset_modes=('unittest'),
device='lmdb',
lmdb_map_size=1e6)
lt_feat_lmdb = lfb_lmdb['video_1,930']
assert lt_feat_lmdb.shape == (3 * 30, 16)