--- a +++ b/tools/data/extract_audio.py @@ -0,0 +1,61 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import glob +import os +import os.path as osp +from multiprocessing import Pool + +import mmcv + + +def extract_audio_wav(line): + """Extract the audio wave from video streams using FFMPEG.""" + video_id, _ = osp.splitext(osp.basename(line)) + video_dir = osp.dirname(line) + video_rel_dir = osp.relpath(video_dir, args.root) + dst_dir = osp.join(args.dst_root, video_rel_dir) + os.popen(f'mkdir -p {dst_dir}') + try: + if osp.exists(f'{dst_dir}/{video_id}.wav'): + return + cmd = f'ffmpeg -i ./{line} -map 0:a -y {dst_dir}/{video_id}.wav' + os.popen(cmd) + except BaseException: + with open('extract_wav_err_file.txt', 'a+') as f: + f.write(f'{line}\n') + + +def parse_args(): + parser = argparse.ArgumentParser(description='Extract audios') + parser.add_argument('root', type=str, help='source video directory') + parser.add_argument('dst_root', type=str, help='output audio directory') + parser.add_argument( + '--level', type=int, default=2, help='directory level of data') + parser.add_argument( + '--ext', + type=str, + default='mp4', + choices=['avi', 'mp4', 'webm'], + help='video file extensions') + parser.add_argument( + '--num-worker', type=int, default=8, help='number of workers') + args = parser.parse_args() + + return args + + +if __name__ == '__main__': + args = parse_args() + + mmcv.mkdir_or_exist(args.dst_root) + + print('Reading videos from folder: ', args.root) + print('Extension of videos: ', args.ext) + fullpath_list = glob.glob(args.root + '/*' * args.level + '.' + args.ext) + done_fullpath_list = glob.glob(args.dst_root + '/*' * args.level + '.wav') + print('Total number of videos found: ', len(fullpath_list)) + print('Total number of videos extracted finished: ', + len(done_fullpath_list)) + + pool = Pool(args.num_worker) + pool.map(extract_audio_wav, fullpath_list)