[6d389a]: / mmaction / localization / ssn_utils.py

Download this file

170 lines (131 with data), 5.0 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# Copyright (c) OpenMMLab. All rights reserved.
from itertools import groupby
import numpy as np
from ..core import average_precision_at_temporal_iou
from . import temporal_iou
def load_localize_proposal_file(filename):
"""Load the proposal file and split it into many parts which contain one
video's information separately.
Args:
filename(str): Path to the proposal file.
Returns:
list: List of all videos' information.
"""
lines = list(open(filename))
# Split the proposal file into many parts which contain one video's
# information separately.
groups = groupby(lines, lambda x: x.startswith('#'))
video_infos = [[x.strip() for x in list(g)] for k, g in groups if not k]
def parse_group(video_info):
"""Parse the video's information.
Template information of a video in a standard file:
# index
video_id
num_frames
fps
num_gts
label, start_frame, end_frame
label, start_frame, end_frame
...
num_proposals
label, best_iou, overlap_self, start_frame, end_frame
label, best_iou, overlap_self, start_frame, end_frame
...
Example of a standard annotation file:
.. code-block:: txt
# 0
video_validation_0000202
5666
1
3
8 130 185
8 832 1136
8 1303 1381
5
8 0.0620 0.0620 790 5671
8 0.1656 0.1656 790 2619
8 0.0833 0.0833 3945 5671
8 0.0960 0.0960 4173 5671
8 0.0614 0.0614 3327 5671
Args:
video_info (list): Information of the video.
Returns:
tuple[str, int, list, list]:
video_id (str): Name of the video.
num_frames (int): Number of frames in the video.
gt_boxes (list): List of the information of gt boxes.
proposal_boxes (list): List of the information of
proposal boxes.
"""
offset = 0
video_id = video_info[offset]
offset += 1
num_frames = int(float(video_info[1]) * float(video_info[2]))
num_gts = int(video_info[3])
offset = 4
gt_boxes = [x.split() for x in video_info[offset:offset + num_gts]]
offset += num_gts
num_proposals = int(video_info[offset])
offset += 1
proposal_boxes = [
x.split() for x in video_info[offset:offset + num_proposals]
]
return video_id, num_frames, gt_boxes, proposal_boxes
return [parse_group(video_info) for video_info in video_infos]
def perform_regression(detections):
"""Perform regression on detection results.
Args:
detections (list): Detection results before regression.
Returns:
list: Detection results after regression.
"""
starts = detections[:, 0]
ends = detections[:, 1]
centers = (starts + ends) / 2
durations = ends - starts
new_centers = centers + durations * detections[:, 3]
new_durations = durations * np.exp(detections[:, 4])
new_detections = np.concatenate(
(np.clip(new_centers - new_durations / 2, 0,
1)[:, None], np.clip(new_centers + new_durations / 2, 0,
1)[:, None], detections[:, 2:]),
axis=1)
return new_detections
def temporal_nms(detections, threshold):
"""Parse the video's information.
Args:
detections (list): Detection results before NMS.
threshold (float): Threshold of NMS.
Returns:
list: Detection results after NMS.
"""
starts = detections[:, 0]
ends = detections[:, 1]
scores = detections[:, 2]
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
ious = temporal_iou(starts[order[1:]], ends[order[1:]], starts[i],
ends[i])
idxs = np.where(ious <= threshold)[0]
order = order[idxs + 1]
return detections[keep, :]
def eval_ap(detections, gt_by_cls, iou_range):
"""Evaluate average precisions.
Args:
detections (dict): Results of detections.
gt_by_cls (dict): Information of groudtruth.
iou_range (list): Ranges of iou.
Returns:
list: Average precision values of classes at ious.
"""
ap_values = np.zeros((len(detections), len(iou_range)))
for iou_idx, min_overlap in enumerate(iou_range):
for class_idx, _ in enumerate(detections):
ap = average_precision_at_temporal_iou(gt_by_cls[class_idx],
detections[class_idx],
[min_overlap])
ap_values[class_idx, iou_idx] = ap
return ap_values