[6d389a]: / tools / data / ava / download_videos_parallel.py

Download this file

67 lines (53 with data), 2.2 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp
import subprocess
import mmcv
from joblib import Parallel, delayed
URL_PREFIX = 'https://s3.amazonaws.com/ava-dataset/trainval/'
def download_video(video_url, output_dir, num_attempts=5):
video_file = osp.basename(video_url)
output_file = osp.join(output_dir, video_file)
status = False
if not osp.exists(output_file):
command = ['wget', '-c', video_url, '-P', output_dir]
command = ' '.join(command)
print(command)
attempts = 0
while True:
try:
subprocess.check_output(
command, shell=True, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError:
attempts += 1
if attempts == num_attempts:
return status, 'Downloading Failed'
else:
break
status = osp.exists(output_file)
return status, 'Downloaded'
def main(source_file, output_dir, num_jobs=24, num_attempts=5):
mmcv.mkdir_or_exist(output_dir)
video_list = open(source_file).read().strip().split('\n')
video_list = [osp.join(URL_PREFIX, video) for video in video_list]
if num_jobs == 1:
status_list = []
for video in video_list:
video_list.append(download_video(video, output_dir, num_attempts))
else:
status_list = Parallel(n_jobs=num_jobs)(
delayed(download_video)(video, output_dir, num_attempts)
for video in video_list)
mmcv.dump(status_list, 'download_report.json')
if __name__ == '__main__':
description = 'Helper script for downloading AVA videos'
parser = argparse.ArgumentParser(description=description)
parser.add_argument(
'source_file', type=str, help='TXT file containing the video filename')
parser.add_argument(
'output_dir',
type=str,
help='Output directory where videos will be saved')
parser.add_argument('-n', '--num-jobs', type=int, default=24)
parser.add_argument('--num-attempts', type=int, default=5)
main(**vars(parser.parse_args()))