--- a +++ b/utils/downloads.py @@ -0,0 +1,127 @@ +# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license +""" +Download utils +""" + +import logging +import subprocess +import urllib +from pathlib import Path + +import requests +import torch + + +def is_url(url, check=True): + # Check if string is URL and check if URL exists + try: + url = str(url) + result = urllib.parse.urlparse(url) + assert all([result.scheme, result.netloc]) # check if is url + return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online + except (AssertionError, urllib.request.HTTPError): + return False + + +def gsutil_getsize(url=''): + # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du + output = subprocess.check_output(['gsutil', 'du', url], shell=True, encoding='utf-8') + if output: + return int(output.split()[0]) + return 0 + + +def url_getsize(url='https://ultralytics.com/images/bus.jpg'): + # Return downloadable file size in bytes + response = requests.head(url, allow_redirects=True) + return int(response.headers.get('content-length', -1)) + + +def curl_download(url, filename, *, silent: bool = False) -> bool: + """ + Download a file from a url to a filename using curl. + """ + silent_option = 'sS' if silent else '' # silent + proc = subprocess.run([ + 'curl', + '-#', + f'-{silent_option}L', + url, + '--output', + filename, + '--retry', + '9', + '-C', + '-', ]) + return proc.returncode == 0 + + +def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''): + # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes + from utils.general import LOGGER + + file = Path(file) + assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}" + try: # url1 + LOGGER.info(f'Downloading {url} to {file}...') + torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO) + assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check + except Exception as e: # url2 + if file.exists(): + file.unlink() # remove partial downloads + LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...') + # curl download, retry and resume on fail + curl_download(url2 or url, file) + finally: + if not file.exists() or file.stat().st_size < min_bytes: # check + if file.exists(): + file.unlink() # remove partial downloads + LOGGER.info(f'ERROR: {assert_msg}\n{error_msg}') + LOGGER.info('') + + +def attempt_download(file, repo='ultralytics/yolov5', release='v7.0'): + # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v7.0', etc. + from utils.general import LOGGER + + def github_assets(repository, version='latest'): + # Return GitHub repo tag (i.e. 'v7.0') and assets (i.e. ['yolov5s.pt', 'yolov5m.pt', ...]) + if version != 'latest': + version = f'tags/{version}' # i.e. tags/v7.0 + response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json() # github api + return response['tag_name'], [x['name'] for x in response['assets']] # tag, assets + + file = Path(str(file).strip().replace("'", '')) + if not file.exists(): + # URL specified + name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc. + if str(file).startswith(('http:/', 'https:/')): # download + url = str(file).replace(':/', '://') # Pathlib turns :// -> :/ + file = name.split('?')[0] # parse authentication https://url.com/file.txt?auth... + if Path(file).is_file(): + LOGGER.info(f'Found {url} locally at {file}') # file already exists + else: + safe_download(file=file, url=url, min_bytes=1E5) + return file + + # GitHub assets + assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default + try: + tag, assets = github_assets(repo, release) + except Exception: + try: + tag, assets = github_assets(repo) # latest release + except Exception: + try: + tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1] + except Exception: + tag = release + + if name in assets: + file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required) + safe_download(file, + url=f'https://github.com/{repo}/releases/download/{tag}/{name}', + min_bytes=1E5, + error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag}') + + return str(file)