# Copyright (c) OpenMMLab. All rights reserved.
import copy
import os.path as osp
import torch
from .base import BaseDataset
from .builder import DATASETS
@DATASETS.register_module()
class RawframeDataset(BaseDataset):
"""Rawframe dataset for action recognition.
The dataset loads raw frames and apply specified transforms to return a
dict containing the frame tensors and other information.
The ann_file is a text file with multiple lines, and each line indicates
the directory to frames of a video, total frames of the video and
the label of a video, which are split with a whitespace.
Example of a annotation file:
.. code-block:: txt
some/directory-1 163 1
some/directory-2 122 1
some/directory-3 258 2
some/directory-4 234 2
some/directory-5 295 3
some/directory-6 121 3
Example of a multi-class annotation file:
.. code-block:: txt
some/directory-1 163 1 3 5
some/directory-2 122 1 2
some/directory-3 258 2
some/directory-4 234 2 4 6 8
some/directory-5 295 3
some/directory-6 121 3
Example of a with_offset annotation file (clips from long videos), each
line indicates the directory to frames of a video, the index of the start
frame, total frames of the video clip and the label of a video clip, which
are split with a whitespace.
.. code-block:: txt
some/directory-1 12 163 3
some/directory-2 213 122 4
some/directory-3 100 258 5
some/directory-4 98 234 2
some/directory-5 0 295 3
some/directory-6 50 121 3
Args:
ann_file (str): Path to the annotation file.
pipeline (list[dict | callable]): A sequence of data transforms.
data_prefix (str | None): Path to a directory where videos are held.
Default: None.
test_mode (bool): Store True when building test or validation dataset.
Default: False.
filename_tmpl (str): Template for each filename.
Default: 'img_{:05}.jpg'.
with_offset (bool): Determines whether the offset information is in
ann_file. Default: False.
multi_class (bool): Determines whether it is a multi-class
recognition dataset. Default: False.
num_classes (int | None): Number of classes in the dataset.
Default: None.
modality (str): Modality of data. Support 'RGB', 'Flow'.
Default: 'RGB'.
sample_by_class (bool): Sampling by class, should be set `True` when
performing inter-class data balancing. Only compatible with
`multi_class == False`. Only applies for training. Default: False.
power (float): We support sampling data with the probability
proportional to the power of its label frequency (freq ^ power)
when sampling data. `power == 1` indicates uniformly sampling all
data; `power == 0` indicates uniformly sampling all classes.
Default: 0.
dynamic_length (bool): If the dataset length is dynamic (used by
ClassSpecificDistributedSampler). Default: False.
"""
def __init__(self,
ann_file,
pipeline,
data_prefix=None,
test_mode=False,
filename_tmpl='img_{:05}.jpg',
with_offset=False,
multi_class=False,
num_classes=None,
start_index=1,
modality='RGB',
sample_by_class=False,
power=0.,
dynamic_length=False):
self.filename_tmpl = filename_tmpl
self.with_offset = with_offset
super().__init__(
ann_file,
pipeline,
data_prefix,
test_mode,
multi_class,
num_classes,
start_index,
modality,
sample_by_class=sample_by_class,
power=power,
dynamic_length=dynamic_length)
def load_annotations(self):
"""Load annotation file to get video information."""
if self.ann_file.endswith('.json'):
return self.load_json_annotations()
video_infos = []
with open(self.ann_file, 'r') as fin:
for line in fin:
line_split = line.strip().split()
video_info = {}
idx = 0
# idx for frame_dir
frame_dir = line_split[idx]
if self.data_prefix is not None:
frame_dir = osp.join(self.data_prefix, frame_dir)
video_info['frame_dir'] = frame_dir
idx += 1
if self.with_offset:
# idx for offset and total_frames
video_info['offset'] = int(line_split[idx])
video_info['total_frames'] = int(line_split[idx + 1])
idx += 2
else:
# idx for total_frames
video_info['total_frames'] = int(line_split[idx])
idx += 1
# idx for label[s]
label = [int(x) for x in line_split[idx:]]
assert label, f'missing label in line: {line}'
if self.multi_class:
assert self.num_classes is not None
video_info['label'] = label
else:
assert len(label) == 1
video_info['label'] = label[0]
video_infos.append(video_info)
return video_infos
def prepare_train_frames(self, idx):
"""Prepare the frames for training given the index."""
results = copy.deepcopy(self.video_infos[idx])
results['filename_tmpl'] = self.filename_tmpl
results['modality'] = self.modality
results['start_index'] = self.start_index
# prepare tensor in getitem
if self.multi_class:
onehot = torch.zeros(self.num_classes)
onehot[results['label']] = 1.
results['label'] = onehot
return self.pipeline(results)
def prepare_test_frames(self, idx):
"""Prepare the frames for testing given the index."""
results = copy.deepcopy(self.video_infos[idx])
results['filename_tmpl'] = self.filename_tmpl
results['modality'] = self.modality
results['start_index'] = self.start_index
# prepare tensor in getitem
if self.multi_class:
onehot = torch.zeros(self.num_classes)
onehot[results['label']] = 1.
results['label'] = onehot
return self.pipeline(results)