Surgical-Bleeding-AMAGI / Git / [6d389a] /tools/data/activitynet/download.py

Models:
DavidFeaster/
Surgical-Bleeding-AMAGI
Downloads: 1
[6d389a]: / tools / data / activitynet / download.py
History
Download this file
149 lines (126 with data), 4.9 kB

# Copyright (c) OpenMMLab. All rights reserved.
# This scripts is copied from
# https://github.com/activitynet/ActivityNet/blob/master/Crawler/Kinetics/download.py  # noqa: E501
# The code is licensed under the MIT licence.
import argparse
import os
import ssl
import subprocess

import mmcv
from joblib import Parallel, delayed

ssl._create_default_https_context = ssl._create_unverified_context
data_file = '../../../data/ActivityNet'
output_dir = f'{data_file}/videos'


def parse_args():
    parser = argparse.ArgumentParser(description='ActivityNet downloader')
    parser.add_argument(
        '--bsn',
        action='store_true',
        help='download for BSN annotation or official one')
    args = parser.parse_args()
    return args


def download_clip(video_identifier,
                  output_filename,
                  num_attempts=5,
                  url_base='https://www.youtube.com/watch?v='):
    """Download a video from youtube if exists and is not blocked.
    arguments:
    ---------
    video_identifier: str
        Unique YouTube video identifier (11 characters)
    output_filename: str
        File path where the video will be stored.
    """
    # Defensive argument checking.
    assert isinstance(video_identifier, str), 'video_identifier must be string'
    assert isinstance(output_filename, str), 'output_filename must be string'
    assert len(video_identifier) == 11, 'video_identifier must have length 11'

    status = False

    if not os.path.exists(output_filename):
        command = [
            'youtube-dl', '--quiet', '--no-warnings', '--no-check-certificate',
            '-f', 'mp4', '-o',
            '"%s"' % output_filename,
            '"%s"' % (url_base + video_identifier)
        ]
        command = ' '.join(command)
        print(command)
        attempts = 0
        while True:
            try:
                subprocess.check_output(
                    command, shell=True, stderr=subprocess.STDOUT)
            except subprocess.CalledProcessError:
                attempts += 1
                if attempts == num_attempts:
                    return status, 'Fail'
            else:
                break
    # Check if the video was successfully saved.
    status = os.path.exists(output_filename)
    return status, 'Downloaded'


def download_clip_wrapper(youtube_id, output_dir):
    """Wrapper for parallel processing purposes."""
    # we do this to align with names in annotations
    output_filename = os.path.join(output_dir, 'v_' + youtube_id + '.mp4')
    if os.path.exists(output_filename):
        status = tuple(['v_' + youtube_id, True, 'Exists'])
        return status

    downloaded, log = download_clip(youtube_id, output_filename)
    status = tuple(['v_' + youtube_id, downloaded, log])
    return status


def parse_activitynet_annotations(input_csv, is_bsn_case=False):
    """Returns a list of YoutubeID.
    arguments:
    ---------
    input_csv: str
        Path to CSV file containing the following columns:
          'video,numFrame,seconds,fps,rfps,subset,featureFrame'
    returns:
    -------
    youtube_ids: list
        List of all YoutubeIDs in ActivityNet.

    """
    if is_bsn_case:
        lines = open(input_csv).readlines()
        lines = lines[1:]
        # YoutubeIDs do not have prefix `v_`
        youtube_ids = [x.split(',')[0][2:] for x in lines]
    else:
        data = mmcv.load(anno_file)['database']
        youtube_ids = list(data.keys())

    return youtube_ids


def main(input_csv, output_dir, anno_file, num_jobs=24, is_bsn_case=False):
    # Reading and parsing ActivityNet.
    youtube_ids = parse_activitynet_annotations(input_csv, is_bsn_case)

    # Creates folders where videos will be saved later.
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    # Download all clips.
    if num_jobs == 1:
        status_list = []
        for index in youtube_ids:
            status_list.append(download_clip_wrapper(index, output_dir))
    else:
        status_list = Parallel(n_jobs=num_jobs)(
            delayed(download_clip_wrapper)(index, output_dir)
            for index in youtube_ids)

    # Save download report.
    mmcv.dump(status_list, 'download_report.json')
    annotation = mmcv.load(anno_file)
    downloaded = {status[0]: status[1] for status in status_list}
    annotation = {k: v for k, v in annotation.items() if downloaded[k]}

    if is_bsn_case:
        anno_file_bak = anno_file.replace('.json', '_bak.json')
        os.rename(anno_file, anno_file_bak)
        mmcv.dump(annotation, anno_file)


if __name__ == '__main__':
    args = parse_args()
    is_bsn_case = args.bsn
    if is_bsn_case:
        video_list = f'{data_file}/video_info_new.csv'
        anno_file = f'{data_file}/anet_anno_action.json'
    else:
        video_list = f'{data_file}/activity_net.v1-3.min.json'
        anno_file = video_list
    main(video_list, output_dir, anno_file, 24, is_bsn_case)