Diff of /utils/downloads.py [000000] .. [190ca4]

Switch to side-by-side view

--- a
+++ b/utils/downloads.py
@@ -0,0 +1,127 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""
+Download utils
+"""
+
+import logging
+import subprocess
+import urllib
+from pathlib import Path
+
+import requests
+import torch
+
+
+def is_url(url, check=True):
+    # Check if string is URL and check if URL exists
+    try:
+        url = str(url)
+        result = urllib.parse.urlparse(url)
+        assert all([result.scheme, result.netloc])  # check if is url
+        return (urllib.request.urlopen(url).getcode() == 200) if check else True  # check if exists online
+    except (AssertionError, urllib.request.HTTPError):
+        return False
+
+
+def gsutil_getsize(url=''):
+    # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
+    output = subprocess.check_output(['gsutil', 'du', url], shell=True, encoding='utf-8')
+    if output:
+        return int(output.split()[0])
+    return 0
+
+
+def url_getsize(url='https://ultralytics.com/images/bus.jpg'):
+    # Return downloadable file size in bytes
+    response = requests.head(url, allow_redirects=True)
+    return int(response.headers.get('content-length', -1))
+
+
+def curl_download(url, filename, *, silent: bool = False) -> bool:
+    """
+    Download a file from a url to a filename using curl.
+    """
+    silent_option = 'sS' if silent else ''  # silent
+    proc = subprocess.run([
+        'curl',
+        '-#',
+        f'-{silent_option}L',
+        url,
+        '--output',
+        filename,
+        '--retry',
+        '9',
+        '-C',
+        '-', ])
+    return proc.returncode == 0
+
+
+def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
+    # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
+    from utils.general import LOGGER
+
+    file = Path(file)
+    assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
+    try:  # url1
+        LOGGER.info(f'Downloading {url} to {file}...')
+        torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO)
+        assert file.exists() and file.stat().st_size > min_bytes, assert_msg  # check
+    except Exception as e:  # url2
+        if file.exists():
+            file.unlink()  # remove partial downloads
+        LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
+        # curl download, retry and resume on fail
+        curl_download(url2 or url, file)
+    finally:
+        if not file.exists() or file.stat().st_size < min_bytes:  # check
+            if file.exists():
+                file.unlink()  # remove partial downloads
+            LOGGER.info(f'ERROR: {assert_msg}\n{error_msg}')
+        LOGGER.info('')
+
+
+def attempt_download(file, repo='ultralytics/yolov5', release='v7.0'):
+    # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v7.0', etc.
+    from utils.general import LOGGER
+
+    def github_assets(repository, version='latest'):
+        # Return GitHub repo tag (i.e. 'v7.0') and assets (i.e. ['yolov5s.pt', 'yolov5m.pt', ...])
+        if version != 'latest':
+            version = f'tags/{version}'  # i.e. tags/v7.0
+        response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json()  # github api
+        return response['tag_name'], [x['name'] for x in response['assets']]  # tag, assets
+
+    file = Path(str(file).strip().replace("'", ''))
+    if not file.exists():
+        # URL specified
+        name = Path(urllib.parse.unquote(str(file))).name  # decode '%2F' to '/' etc.
+        if str(file).startswith(('http:/', 'https:/')):  # download
+            url = str(file).replace(':/', '://')  # Pathlib turns :// -> :/
+            file = name.split('?')[0]  # parse authentication https://url.com/file.txt?auth...
+            if Path(file).is_file():
+                LOGGER.info(f'Found {url} locally at {file}')  # file already exists
+            else:
+                safe_download(file=file, url=url, min_bytes=1E5)
+            return file
+
+        # GitHub assets
+        assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')]  # default
+        try:
+            tag, assets = github_assets(repo, release)
+        except Exception:
+            try:
+                tag, assets = github_assets(repo)  # latest release
+            except Exception:
+                try:
+                    tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
+                except Exception:
+                    tag = release
+
+        if name in assets:
+            file.parent.mkdir(parents=True, exist_ok=True)  # make parent dir (if required)
+            safe_download(file,
+                          url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
+                          min_bytes=1E5,
+                          error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag}')
+
+    return str(file)