# Copyright (c) OpenMMLab. All rights reserved.
from itertools import groupby
import numpy as np
from ..core import average_precision_at_temporal_iou
from . import temporal_iou
def load_localize_proposal_file(filename):
"""Load the proposal file and split it into many parts which contain one
video's information separately.
Args:
filename(str): Path to the proposal file.
Returns:
list: List of all videos' information.
"""
lines = list(open(filename))
# Split the proposal file into many parts which contain one video's
# information separately.
groups = groupby(lines, lambda x: x.startswith('#'))
video_infos = [[x.strip() for x in list(g)] for k, g in groups if not k]
def parse_group(video_info):
"""Parse the video's information.
Template information of a video in a standard file:
# index
video_id
num_frames
fps
num_gts
label, start_frame, end_frame
label, start_frame, end_frame
...
num_proposals
label, best_iou, overlap_self, start_frame, end_frame
label, best_iou, overlap_self, start_frame, end_frame
...
Example of a standard annotation file:
.. code-block:: txt
# 0
video_validation_0000202
5666
1
3
8 130 185
8 832 1136
8 1303 1381
5
8 0.0620 0.0620 790 5671
8 0.1656 0.1656 790 2619
8 0.0833 0.0833 3945 5671
8 0.0960 0.0960 4173 5671
8 0.0614 0.0614 3327 5671
Args:
video_info (list): Information of the video.
Returns:
tuple[str, int, list, list]:
video_id (str): Name of the video.
num_frames (int): Number of frames in the video.
gt_boxes (list): List of the information of gt boxes.
proposal_boxes (list): List of the information of
proposal boxes.
"""
offset = 0
video_id = video_info[offset]
offset += 1
num_frames = int(float(video_info[1]) * float(video_info[2]))
num_gts = int(video_info[3])
offset = 4
gt_boxes = [x.split() for x in video_info[offset:offset + num_gts]]
offset += num_gts
num_proposals = int(video_info[offset])
offset += 1
proposal_boxes = [
x.split() for x in video_info[offset:offset + num_proposals]
]
return video_id, num_frames, gt_boxes, proposal_boxes
return [parse_group(video_info) for video_info in video_infos]
def perform_regression(detections):
"""Perform regression on detection results.
Args:
detections (list): Detection results before regression.
Returns:
list: Detection results after regression.
"""
starts = detections[:, 0]
ends = detections[:, 1]
centers = (starts + ends) / 2
durations = ends - starts
new_centers = centers + durations * detections[:, 3]
new_durations = durations * np.exp(detections[:, 4])
new_detections = np.concatenate(
(np.clip(new_centers - new_durations / 2, 0,
1)[:, None], np.clip(new_centers + new_durations / 2, 0,
1)[:, None], detections[:, 2:]),
axis=1)
return new_detections
def temporal_nms(detections, threshold):
"""Parse the video's information.
Args:
detections (list): Detection results before NMS.
threshold (float): Threshold of NMS.
Returns:
list: Detection results after NMS.
"""
starts = detections[:, 0]
ends = detections[:, 1]
scores = detections[:, 2]
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
ious = temporal_iou(starts[order[1:]], ends[order[1:]], starts[i],
ends[i])
idxs = np.where(ious <= threshold)[0]
order = order[idxs + 1]
return detections[keep, :]
def eval_ap(detections, gt_by_cls, iou_range):
"""Evaluate average precisions.
Args:
detections (dict): Results of detections.
gt_by_cls (dict): Information of groudtruth.
iou_range (list): Ranges of iou.
Returns:
list: Average precision values of classes at ious.
"""
ap_values = np.zeros((len(detections), len(iou_range)))
for iou_idx, min_overlap in enumerate(iou_range):
for class_idx, _ in enumerate(detections):
ap = average_precision_at_temporal_iou(gt_by_cls[class_idx],
detections[class_idx],
[min_overlap])
ap_values[class_idx, iou_idx] = ap
return ap_values