--- a +++ b/configs/recognition/slowonly/config_test.py @@ -0,0 +1,93 @@ +_base_ = [ + '../../_base_/models/slowonly_r50.py', + '../../_base_/schedules/sgd_150e_warmup.py', + '../../_base_/default_runtime.py' +] + +# model settings +model = dict(cls_head=dict(num_classes=101)) + +# dataset settings +dataset_type = 'RawframeDataset' +data_root = '../mmaction/data/ucf101/rawframes/' +data_root_val = '../mmaction/data/ucf101/rawframes/' +split = 1 # official train/test splits. valid numbers: 1, 2, 3 +ann_file_train = f'../mmaction/data/ucf101/ucf101_train_split_{split}_rawframes.txt' +ann_file_val = f'../mmaction/data/ucf101/ucf101_val_split_{split}_rawframes.txt' +ann_file_test = f'../mmaction/data/ucf101/ucf101_val_split_{split}_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) + +train_pipeline = [ + dict(type='SampleFrames', clip_len=8, frame_interval=4, num_clips=1), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='RandomResizedCrop'), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=4, + num_clips=1, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=224), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=4, + num_clips=10, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='ThreeCrop', crop_size=256), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] + +data = dict( + videos_per_gpu=8, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_test, + data_prefix=data_root_val, + pipeline=test_pipeline)) +evaluation = dict( + interval=1, metrics=['top_k_accuracy', 'mean_class_accuracy']) + +# optimizer +optimizer = dict(lr=0.1) # this lr is used for 8 gpus +# learning policy +lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) +total_epochs = 64 + +# runtime settings +work_dir = './work_dirs/slowonly_r50_8x4x1_64e_ucf101_rgb'