Switch to side-by-side view

--- a
+++ b/tools/data/gym/download.py
@@ -0,0 +1,100 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# This scripts is copied from
+# https://github.com/activitynet/ActivityNet/blob/master/Crawler/Kinetics/download.py  # noqa: E501
+# The code is licensed under the MIT licence.
+import argparse
+import os
+import ssl
+import subprocess
+
+import mmcv
+from joblib import Parallel, delayed
+
+ssl._create_default_https_context = ssl._create_unverified_context
+
+
+def download(video_identifier,
+             output_filename,
+             num_attempts=5,
+             url_base='https://www.youtube.com/watch?v='):
+    """Download a video from youtube if exists and is not blocked.
+    arguments:
+    ---------
+    video_identifier: str
+        Unique YouTube video identifier (11 characters)
+    output_filename: str
+        File path where the video will be stored.
+    """
+    # Defensive argument checking.
+    assert isinstance(video_identifier, str), 'video_identifier must be string'
+    assert isinstance(output_filename, str), 'output_filename must be string'
+    assert len(video_identifier) == 11, 'video_identifier must have length 11'
+
+    status = False
+
+    if not os.path.exists(output_filename):
+        command = [
+            'youtube-dl', '--quiet', '--no-warnings', '--no-check-certificate',
+            '-f', 'mp4', '-o',
+            '"%s"' % output_filename,
+            '"%s"' % (url_base + video_identifier)
+        ]
+        command = ' '.join(command)
+        print(command)
+        attempts = 0
+        while True:
+            try:
+                subprocess.check_output(
+                    command, shell=True, stderr=subprocess.STDOUT)
+            except subprocess.CalledProcessError:
+                attempts += 1
+                if attempts == num_attempts:
+                    return status, 'Fail'
+            else:
+                break
+    # Check if the video was successfully saved.
+    status = os.path.exists(output_filename)
+    return status, 'Downloaded'
+
+
+def download_wrapper(youtube_id, output_dir):
+    """Wrapper for parallel processing purposes."""
+    # we do this to align with names in annotations
+    output_filename = os.path.join(output_dir, youtube_id + '.mp4')
+    if os.path.exists(output_filename):
+        status = tuple([youtube_id, True, 'Exists'])
+        return status
+
+    downloaded, log = download(youtube_id, output_filename)
+    status = tuple([youtube_id, downloaded, log])
+    return status
+
+
+def main(input, output_dir, num_jobs=24):
+    # Reading and parsing ActivityNet.
+    youtube_ids = mmcv.load(input).keys()
+    # Creates folders where videos will be saved later.
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    # Download all clips.
+    if num_jobs == 1:
+        status_list = []
+        for index in youtube_ids:
+            status_list.append(download_wrapper(index, output_dir))
+    else:
+        status_list = Parallel(n_jobs=num_jobs)(
+            delayed(download_wrapper)(index, output_dir)
+            for index in youtube_ids)
+
+    # Save download report.
+    mmcv.dump(status_list, 'download_report.json')
+
+
+if __name__ == '__main__':
+    description = 'Helper script for downloading GYM videos.'
+    p = argparse.ArgumentParser(description=description)
+    p.add_argument('input', type=str, help='The gym annotation file')
+    p.add_argument(
+        'output_dir', type=str, help='Output directory to save videos.')
+    p.add_argument('-n', '--num-jobs', type=int, default=24)
+    main(**vars(p.parse_args()))