[6d389a]: / tools / data / gym / download.py

Download this file

101 lines (86 with data), 3.5 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
# Copyright (c) OpenMMLab. All rights reserved.
# This scripts is copied from
# https://github.com/activitynet/ActivityNet/blob/master/Crawler/Kinetics/download.py # noqa: E501
# The code is licensed under the MIT licence.
import argparse
import os
import ssl
import subprocess
import mmcv
from joblib import Parallel, delayed
ssl._create_default_https_context = ssl._create_unverified_context
def download(video_identifier,
output_filename,
num_attempts=5,
url_base='https://www.youtube.com/watch?v='):
"""Download a video from youtube if exists and is not blocked.
arguments:
---------
video_identifier: str
Unique YouTube video identifier (11 characters)
output_filename: str
File path where the video will be stored.
"""
# Defensive argument checking.
assert isinstance(video_identifier, str), 'video_identifier must be string'
assert isinstance(output_filename, str), 'output_filename must be string'
assert len(video_identifier) == 11, 'video_identifier must have length 11'
status = False
if not os.path.exists(output_filename):
command = [
'youtube-dl', '--quiet', '--no-warnings', '--no-check-certificate',
'-f', 'mp4', '-o',
'"%s"' % output_filename,
'"%s"' % (url_base + video_identifier)
]
command = ' '.join(command)
print(command)
attempts = 0
while True:
try:
subprocess.check_output(
command, shell=True, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError:
attempts += 1
if attempts == num_attempts:
return status, 'Fail'
else:
break
# Check if the video was successfully saved.
status = os.path.exists(output_filename)
return status, 'Downloaded'
def download_wrapper(youtube_id, output_dir):
"""Wrapper for parallel processing purposes."""
# we do this to align with names in annotations
output_filename = os.path.join(output_dir, youtube_id + '.mp4')
if os.path.exists(output_filename):
status = tuple([youtube_id, True, 'Exists'])
return status
downloaded, log = download(youtube_id, output_filename)
status = tuple([youtube_id, downloaded, log])
return status
def main(input, output_dir, num_jobs=24):
# Reading and parsing ActivityNet.
youtube_ids = mmcv.load(input).keys()
# Creates folders where videos will be saved later.
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Download all clips.
if num_jobs == 1:
status_list = []
for index in youtube_ids:
status_list.append(download_wrapper(index, output_dir))
else:
status_list = Parallel(n_jobs=num_jobs)(
delayed(download_wrapper)(index, output_dir)
for index in youtube_ids)
# Save download report.
mmcv.dump(status_list, 'download_report.json')
if __name__ == '__main__':
description = 'Helper script for downloading GYM videos.'
p = argparse.ArgumentParser(description=description)
p.add_argument('input', type=str, help='The gym annotation file')
p.add_argument(
'output_dir', type=str, help='Output directory to save videos.')
p.add_argument('-n', '--num-jobs', type=int, default=24)
main(**vars(p.parse_args()))