--- a
+++ b/tests/test_models/test_common.py
@@ -0,0 +1,138 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+
+import pytest
+import torch
+import torch.nn as nn
+from mmcv.utils import assert_params_all_zeros
+
+from mmaction.models.common import (LFB, TAM, Conv2plus1d, ConvAudio,
+                                    DividedSpatialAttentionWithNorm,
+                                    DividedTemporalAttentionWithNorm,
+                                    FFNWithNorm)
+
+
+def test_conv2plus1d():
+    with pytest.raises(AssertionError):
+        # Length of kernel size, stride and padding must be the same
+        Conv2plus1d(3, 8, (2, 2))
+
+    conv_2plus1d = Conv2plus1d(3, 8, 2)
+    conv_2plus1d.init_weights()
+
+    assert torch.equal(conv_2plus1d.bn_s.weight,
+                       torch.ones_like(conv_2plus1d.bn_s.weight))
+    assert torch.equal(conv_2plus1d.bn_s.bias,
+                       torch.zeros_like(conv_2plus1d.bn_s.bias))
+
+    x = torch.rand(1, 3, 8, 256, 256)
+    output = conv_2plus1d(x)
+    assert output.shape == torch.Size([1, 8, 7, 255, 255])
+
+
+def test_conv_audio():
+    conv_audio = ConvAudio(3, 8, 3)
+    conv_audio.init_weights()
+
+    x = torch.rand(1, 3, 8, 8)
+    output = conv_audio(x)
+    assert output.shape == torch.Size([1, 16, 8, 8])
+
+    conv_audio_sum = ConvAudio(3, 8, 3, op='sum')
+    output = conv_audio_sum(x)
+    assert output.shape == torch.Size([1, 8, 8, 8])
+
+
+def test_divided_temporal_attention_with_norm():
+    _cfg = dict(embed_dims=768, num_heads=12, num_frames=8)
+    divided_temporal_attention = DividedTemporalAttentionWithNorm(**_cfg)
+    assert isinstance(divided_temporal_attention.norm, nn.LayerNorm)
+    assert assert_params_all_zeros(divided_temporal_attention.temporal_fc)
+
+    x = torch.rand(1, 1 + 8 * 14 * 14, 768)
+    output = divided_temporal_attention(x)
+    assert output.shape == torch.Size([1, 1 + 8 * 14 * 14, 768])
+
+
+def test_divided_spatial_attention_with_norm():
+    _cfg = dict(embed_dims=512, num_heads=8, num_frames=4, dropout_layer=None)
+    divided_spatial_attention = DividedSpatialAttentionWithNorm(**_cfg)
+    assert isinstance(divided_spatial_attention.dropout_layer, nn.Identity)
+    assert isinstance(divided_spatial_attention.norm, nn.LayerNorm)
+
+    x = torch.rand(1, 1 + 4 * 14 * 14, 512)
+    output = divided_spatial_attention(x)
+    assert output.shape == torch.Size([1, 1 + 4 * 14 * 14, 512])
+
+
+def test_ffn_with_norm():
+    _cfg = dict(
+        embed_dims=256, feedforward_channels=256 * 2, norm_cfg=dict(type='LN'))
+    ffn_with_norm = FFNWithNorm(**_cfg)
+    assert isinstance(ffn_with_norm.norm, nn.LayerNorm)
+
+    x = torch.rand(1, 1 + 4 * 14 * 14, 256)
+    output = ffn_with_norm(x)
+    assert output.shape == torch.Size([1, 1 + 4 * 14 * 14, 256])
+
+
+def test_TAM():
+    """test TAM."""
+    with pytest.raises(AssertionError):
+        # alpha must be a positive integer
+        TAM(16, 8, alpha=0, beta=4)
+
+    with pytest.raises(AssertionError):
+        # beta must be a positive integer
+        TAM(16, 8, alpha=2, beta=0)
+
+    with pytest.raises(AssertionError):
+        # the channels number of x should be equal to self.in_channels of TAM
+        tam = TAM(16, 8)
+        x = torch.rand(64, 8, 112, 112)
+        tam(x)
+
+    tam = TAM(16, 8)
+    x = torch.rand(32, 16, 112, 112)
+    output = tam(x)
+    assert output.shape == torch.Size([32, 16, 112, 112])
+
+
+def test_LFB():
+    """test LFB."""
+    with pytest.raises(ValueError):
+        LFB(lfb_prefix_path='./_non_exist_path')
+
+    lfb_prefix_path = osp.normpath(
+        osp.join(osp.dirname(__file__), '../data/lfb'))
+
+    with pytest.raises(AssertionError):
+        LFB(lfb_prefix_path=lfb_prefix_path, dataset_modes=100)
+
+    with pytest.raises(ValueError):
+        LFB(lfb_prefix_path=lfb_prefix_path, device='ceph')
+
+    # load on cpu
+    lfb_cpu = LFB(
+        lfb_prefix_path=lfb_prefix_path,
+        max_num_sampled_feat=5,
+        window_size=60,
+        lfb_channels=16,
+        dataset_modes=('unittest'),
+        device='cpu')
+
+    lt_feat_cpu = lfb_cpu['video_1,930']
+    assert lt_feat_cpu.shape == (5 * 60, 16)
+    assert len(lfb_cpu) == 1
+
+    # load on lmdb
+    lfb_lmdb = LFB(
+        lfb_prefix_path=lfb_prefix_path,
+        max_num_sampled_feat=3,
+        window_size=30,
+        lfb_channels=16,
+        dataset_modes=('unittest'),
+        device='lmdb',
+        lmdb_map_size=1e6)
+    lt_feat_lmdb = lfb_lmdb['video_1,930']
+    assert lt_feat_lmdb.shape == (3 * 30, 16)