--- a +++ b/mmaction/datasets/video_dataset.py @@ -0,0 +1,61 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +from .base import BaseDataset +from .builder import DATASETS + + +@DATASETS.register_module() +class VideoDataset(BaseDataset): + """Video dataset for action recognition. + + The dataset loads raw videos and apply specified transforms to return a + dict containing the frame tensors and other information. + + The ann_file is a text file with multiple lines, and each line indicates + a sample video with the filepath and label, which are split with a + whitespace. Example of a annotation file: + + .. code-block:: txt + + some/path/000.mp4 1 + some/path/001.mp4 1 + some/path/002.mp4 2 + some/path/003.mp4 2 + some/path/004.mp4 3 + some/path/005.mp4 3 + + + Args: + ann_file (str): Path to the annotation file. + pipeline (list[dict | callable]): A sequence of data transforms. + start_index (int): Specify a start index for frames in consideration of + different filename format. However, when taking videos as input, + it should be set to 0, since frames loaded from videos count + from 0. Default: 0. + **kwargs: Keyword arguments for ``BaseDataset``. + """ + + def __init__(self, ann_file, pipeline, start_index=0, **kwargs): + super().__init__(ann_file, pipeline, start_index=start_index, **kwargs) + + def load_annotations(self): + """Load annotation file to get video information.""" + if self.ann_file.endswith('.json'): + return self.load_json_annotations() + + video_infos = [] + with open(self.ann_file, 'r') as fin: + for line in fin: + line_split = line.strip().split() + if self.multi_class: + assert self.num_classes is not None + filename, label = line_split[0], line_split[1:] + label = list(map(int, label)) + else: + filename, label = line_split + label = int(label) + if self.data_prefix is not None: + filename = osp.join(self.data_prefix, filename) + video_infos.append(dict(filename=filename, label=label)) + return video_infos