# Copyright (c) OpenMMLab. All rights reserved.
import copy
import os
import os.path as osp
import warnings
from collections import OrderedDict
import mmcv
import numpy as np
from ..core import average_recall_at_avg_proposals
from .base import BaseDataset
from .builder import DATASETS
@DATASETS.register_module()
class ActivityNetDataset(BaseDataset):
"""ActivityNet dataset for temporal action localization.
The dataset loads raw features and apply specified transforms to return a
dict containing the frame tensors and other information.
The ann_file is a json file with multiple objects, and each object has a
key of the name of a video, and value of total frames of the video, total
seconds of the video, annotations of a video, feature frames (frames
covered by features) of the video, fps and rfps. Example of a
annotation file:
.. code-block:: JSON
{
"v_--1DO2V4K74": {
"duration_second": 211.53,
"duration_frame": 6337,
"annotations": [
{
"segment": [
30.025882995319815,
205.2318595943838
],
"label": "Rock climbing"
}
],
"feature_frame": 6336,
"fps": 30.0,
"rfps": 29.9579255898
},
"v_--6bJUbfpnQ": {
"duration_second": 26.75,
"duration_frame": 647,
"annotations": [
{
"segment": [
2.578755070202808,
24.914101404056165
],
"label": "Drinking beer"
}
],
"feature_frame": 624,
"fps": 24.0,
"rfps": 24.1869158879
},
...
}
Args:
ann_file (str): Path to the annotation file.
pipeline (list[dict | callable]): A sequence of data transforms.
data_prefix (str | None): Path to a directory where videos are held.
Default: None.
test_mode (bool): Store True when building test or validation dataset.
Default: False.
"""
def __init__(self, ann_file, pipeline, data_prefix=None, test_mode=False):
super().__init__(ann_file, pipeline, data_prefix, test_mode)
def load_annotations(self):
"""Load the annotation according to ann_file into video_infos."""
video_infos = []
anno_database = mmcv.load(self.ann_file)
for video_name in anno_database:
video_info = anno_database[video_name]
video_info['video_name'] = video_name
video_infos.append(video_info)
return video_infos
def prepare_test_frames(self, idx):
"""Prepare the frames for testing given the index."""
results = copy.deepcopy(self.video_infos[idx])
results['data_prefix'] = self.data_prefix
return self.pipeline(results)
def prepare_train_frames(self, idx):
"""Prepare the frames for training given the index."""
results = copy.deepcopy(self.video_infos[idx])
results['data_prefix'] = self.data_prefix
return self.pipeline(results)
def __len__(self):
"""Get the size of the dataset."""
return len(self.video_infos)
def _import_ground_truth(self):
"""Read ground truth data from video_infos."""
ground_truth = {}
for video_info in self.video_infos:
video_id = video_info['video_name'][2:]
this_video_ground_truths = []
for ann in video_info['annotations']:
t_start, t_end = ann['segment']
label = ann['label']
this_video_ground_truths.append([t_start, t_end, label])
ground_truth[video_id] = np.array(this_video_ground_truths)
return ground_truth
@staticmethod
def proposals2json(results, show_progress=False):
"""Convert all proposals to a final dict(json) format.
Args:
results (list[dict]): All proposals.
show_progress (bool): Whether to show the progress bar.
Defaults: False.
Returns:
dict: The final result dict. E.g.
.. code-block:: Python
dict(video-1=[dict(segment=[1.1,2.0]. score=0.9),
dict(segment=[50.1, 129.3], score=0.6)])
"""
result_dict = {}
print('Convert proposals to json format')
if show_progress:
prog_bar = mmcv.ProgressBar(len(results))
for result in results:
video_name = result['video_name']
result_dict[video_name[2:]] = result['proposal_list']
if show_progress:
prog_bar.update()
return result_dict
@staticmethod
def _import_proposals(results):
"""Read predictions from results."""
proposals = {}
num_proposals = 0
for result in results:
video_id = result['video_name'][2:]
this_video_proposals = []
for proposal in result['proposal_list']:
t_start, t_end = proposal['segment']
score = proposal['score']
this_video_proposals.append([t_start, t_end, score])
num_proposals += 1
proposals[video_id] = np.array(this_video_proposals)
return proposals, num_proposals
def dump_results(self, results, out, output_format, version='VERSION 1.3'):
"""Dump data to json/csv files."""
if output_format == 'json':
result_dict = self.proposals2json(results)
output_dict = {
'version': version,
'results': result_dict,
'external_data': {}
}
mmcv.dump(output_dict, out)
elif output_format == 'csv':
# TODO: add csv handler to mmcv and use mmcv.dump
os.makedirs(out, exist_ok=True)
header = 'action,start,end,tmin,tmax'
for result in results:
video_name, outputs = result
output_path = osp.join(out, video_name + '.csv')
np.savetxt(
output_path,
outputs,
header=header,
delimiter=',',
comments='')
else:
raise ValueError(
f'The output format {output_format} is not supported.')
def evaluate(
self,
results,
metrics='AR@AN',
metric_options={
'AR@AN':
dict(
max_avg_proposals=100,
temporal_iou_thresholds=np.linspace(0.5, 0.95, 10))
},
logger=None,
**deprecated_kwargs):
"""Evaluation in feature dataset.
Args:
results (list[dict]): Output results.
metrics (str | sequence[str]): Metrics to be performed.
Defaults: 'AR@AN'.
metric_options (dict): Dict for metric options. Options are
``max_avg_proposals``, ``temporal_iou_thresholds`` for
``AR@AN``.
default: ``{'AR@AN': dict(max_avg_proposals=100,
temporal_iou_thresholds=np.linspace(0.5, 0.95, 10))}``.
logger (logging.Logger | None): Training logger. Defaults: None.
deprecated_kwargs (dict): Used for containing deprecated arguments.
See 'https://github.com/open-mmlab/mmaction2/pull/286'.
Returns:
dict: Evaluation results for evaluation metrics.
"""
# Protect ``metric_options`` since it uses mutable value as default
metric_options = copy.deepcopy(metric_options)
if deprecated_kwargs != {}:
warnings.warn(
'Option arguments for metrics has been changed to '
"`metric_options`, See 'https://github.com/open-mmlab/mmaction2/pull/286' " # noqa: E501
'for more details')
metric_options['AR@AN'] = dict(metric_options['AR@AN'],
**deprecated_kwargs)
if not isinstance(results, list):
raise TypeError(f'results must be a list, but got {type(results)}')
assert len(results) == len(self), (
f'The length of results is not equal to the dataset len: '
f'{len(results)} != {len(self)}')
metrics = metrics if isinstance(metrics, (list, tuple)) else [metrics]
allowed_metrics = ['AR@AN']
for metric in metrics:
if metric not in allowed_metrics:
raise KeyError(f'metric {metric} is not supported')
eval_results = OrderedDict()
ground_truth = self._import_ground_truth()
proposal, num_proposals = self._import_proposals(results)
for metric in metrics:
if metric == 'AR@AN':
temporal_iou_thresholds = metric_options.setdefault(
'AR@AN', {}).setdefault('temporal_iou_thresholds',
np.linspace(0.5, 0.95, 10))
max_avg_proposals = metric_options.setdefault(
'AR@AN', {}).setdefault('max_avg_proposals', 100)
if isinstance(temporal_iou_thresholds, list):
temporal_iou_thresholds = np.array(temporal_iou_thresholds)
recall, _, _, auc = (
average_recall_at_avg_proposals(
ground_truth,
proposal,
num_proposals,
max_avg_proposals=max_avg_proposals,
temporal_iou_thresholds=temporal_iou_thresholds))
eval_results['auc'] = auc
eval_results['AR@1'] = np.mean(recall[:, 0])
eval_results['AR@5'] = np.mean(recall[:, 4])
eval_results['AR@10'] = np.mean(recall[:, 9])
eval_results['AR@100'] = np.mean(recall[:, 99])
return eval_results