Replace curl with requests

This commit is contained in:
Ines Montani 2020-06-28 16:25:53 +02:00
parent dbe86b3453
commit 569376e34e

View File

@ -9,6 +9,8 @@ import os
import re import re
import shutil import shutil
import sys import sys
import requests
import tqdm
from ._app import app, Arg, Opt, COMMAND, NAME from ._app import app, Arg, Opt, COMMAND, NAME
from .. import about from .. import about
@ -106,7 +108,7 @@ def project_assets_cli(
defined in the "assets" section of the project config. If possible, DVC defined in the "assets" section of the project config. If possible, DVC
will try to track the files so you can pull changes from upstream. It will will try to track the files so you can pull changes from upstream. It will
also try and store the checksum so the assets are versioned. If th file also try and store the checksum so the assets are versioned. If th file
can't be tracked or checked, it will be downloaded using curl. If a checksum can't be tracked or checked, it will be downloaded without DVC. If a checksum
is provided in the project config, the file is only downloaded if no local is provided in the project config, the file is only downloaded if no local
file with the same checksum exists. file with the same checksum exists.
""" """
@ -320,6 +322,7 @@ def fetch_asset(
if checksum == get_checksum(dest_path): if checksum == get_checksum(dest_path):
msg.good(f"Skipping download with matching checksum: {dest}") msg.good(f"Skipping download with matching checksum: {dest}")
return return
dvc_add_cmd = ["dvc", "add", str(dest_path), "--external"]
with working_dir(project_path): with working_dir(project_path):
try: try:
# If these fail, we don't want to output an error or info message. # If these fail, we don't want to output an error or info message.
@ -331,11 +334,13 @@ def fetch_asset(
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
dvc_cmd = ["dvc", "get-url", url, str(dest_path)] dvc_cmd = ["dvc", "get-url", url, str(dest_path)]
print(subprocess.check_output(dvc_cmd, stderr=subprocess.DEVNULL)) print(subprocess.check_output(dvc_cmd, stderr=subprocess.DEVNULL))
run_command(["dvc", "add", str(dest_path)]) run_command(dvc_add_cmd)
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
# TODO: replace curl try:
run_command(["curl", url, "--output", str(dest_path), "--progress-bar"]) download_file(url, dest_path)
run_command(["dvc", "add", str(dest_path)]) except requests.exceptions.HTTPError as e:
msg.fail(f"Download failed: {dest}", e)
run_command(dvc_add_cmd)
if checksum and checksum != get_checksum(dest_path): if checksum and checksum != get_checksum(dest_path):
msg.warn(f"Checksum doesn't match value defined in {CONFIG_FILE}: {dest}") msg.warn(f"Checksum doesn't match value defined in {CONFIG_FILE}: {dest}")
msg.good(f"Fetched asset {dest}") msg.good(f"Fetched asset {dest}")
@ -627,3 +632,26 @@ def check_clone(name: str, dest: Path, repo: str) -> None:
f"Can't clone project, parent directory doesn't exist: {dest.parent}", f"Can't clone project, parent directory doesn't exist: {dest.parent}",
exits=1, exits=1,
) )
def download_file(url: str, dest: Path, chunk_size: int = 1024) -> None:
"""Download a file using requests.
url (str): The URL of the file.
dest (Path): The destination path.
chunk_size (int): The size of chunks to read/write.
"""
response = requests.get(url, stream=True)
response.raise_for_status()
total = int(response.headers.get("content-length", 0))
progress_settings = {
"total": total,
"unit": "iB",
"unit_scale": True,
"unit_divisor": chunk_size,
"leave": False,
}
with dest.open("wb") as f, tqdm.tqdm(**progress_settings) as bar:
for data in response.iter_content(chunk_size=chunk_size):
size = f.write(data)
bar.update(size)