mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 09:56:28 +03:00
Replace curl with requests
This commit is contained in:
parent
dbe86b3453
commit
569376e34e
|
@ -9,6 +9,8 @@ import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
|
import requests
|
||||||
|
import tqdm
|
||||||
|
|
||||||
from ._app import app, Arg, Opt, COMMAND, NAME
|
from ._app import app, Arg, Opt, COMMAND, NAME
|
||||||
from .. import about
|
from .. import about
|
||||||
|
@ -106,7 +108,7 @@ def project_assets_cli(
|
||||||
defined in the "assets" section of the project config. If possible, DVC
|
defined in the "assets" section of the project config. If possible, DVC
|
||||||
will try to track the files so you can pull changes from upstream. It will
|
will try to track the files so you can pull changes from upstream. It will
|
||||||
also try and store the checksum so the assets are versioned. If th file
|
also try and store the checksum so the assets are versioned. If th file
|
||||||
can't be tracked or checked, it will be downloaded using curl. If a checksum
|
can't be tracked or checked, it will be downloaded without DVC. If a checksum
|
||||||
is provided in the project config, the file is only downloaded if no local
|
is provided in the project config, the file is only downloaded if no local
|
||||||
file with the same checksum exists.
|
file with the same checksum exists.
|
||||||
"""
|
"""
|
||||||
|
@ -320,6 +322,7 @@ def fetch_asset(
|
||||||
if checksum == get_checksum(dest_path):
|
if checksum == get_checksum(dest_path):
|
||||||
msg.good(f"Skipping download with matching checksum: {dest}")
|
msg.good(f"Skipping download with matching checksum: {dest}")
|
||||||
return
|
return
|
||||||
|
dvc_add_cmd = ["dvc", "add", str(dest_path), "--external"]
|
||||||
with working_dir(project_path):
|
with working_dir(project_path):
|
||||||
try:
|
try:
|
||||||
# If these fail, we don't want to output an error or info message.
|
# If these fail, we don't want to output an error or info message.
|
||||||
|
@ -331,11 +334,13 @@ def fetch_asset(
|
||||||
except subprocess.CalledProcessError:
|
except subprocess.CalledProcessError:
|
||||||
dvc_cmd = ["dvc", "get-url", url, str(dest_path)]
|
dvc_cmd = ["dvc", "get-url", url, str(dest_path)]
|
||||||
print(subprocess.check_output(dvc_cmd, stderr=subprocess.DEVNULL))
|
print(subprocess.check_output(dvc_cmd, stderr=subprocess.DEVNULL))
|
||||||
run_command(["dvc", "add", str(dest_path)])
|
run_command(dvc_add_cmd)
|
||||||
except subprocess.CalledProcessError:
|
except subprocess.CalledProcessError:
|
||||||
# TODO: replace curl
|
try:
|
||||||
run_command(["curl", url, "--output", str(dest_path), "--progress-bar"])
|
download_file(url, dest_path)
|
||||||
run_command(["dvc", "add", str(dest_path)])
|
except requests.exceptions.HTTPError as e:
|
||||||
|
msg.fail(f"Download failed: {dest}", e)
|
||||||
|
run_command(dvc_add_cmd)
|
||||||
if checksum and checksum != get_checksum(dest_path):
|
if checksum and checksum != get_checksum(dest_path):
|
||||||
msg.warn(f"Checksum doesn't match value defined in {CONFIG_FILE}: {dest}")
|
msg.warn(f"Checksum doesn't match value defined in {CONFIG_FILE}: {dest}")
|
||||||
msg.good(f"Fetched asset {dest}")
|
msg.good(f"Fetched asset {dest}")
|
||||||
|
@ -627,3 +632,26 @@ def check_clone(name: str, dest: Path, repo: str) -> None:
|
||||||
f"Can't clone project, parent directory doesn't exist: {dest.parent}",
|
f"Can't clone project, parent directory doesn't exist: {dest.parent}",
|
||||||
exits=1,
|
exits=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def download_file(url: str, dest: Path, chunk_size: int = 1024) -> None:
|
||||||
|
"""Download a file using requests.
|
||||||
|
|
||||||
|
url (str): The URL of the file.
|
||||||
|
dest (Path): The destination path.
|
||||||
|
chunk_size (int): The size of chunks to read/write.
|
||||||
|
"""
|
||||||
|
response = requests.get(url, stream=True)
|
||||||
|
response.raise_for_status()
|
||||||
|
total = int(response.headers.get("content-length", 0))
|
||||||
|
progress_settings = {
|
||||||
|
"total": total,
|
||||||
|
"unit": "iB",
|
||||||
|
"unit_scale": True,
|
||||||
|
"unit_divisor": chunk_size,
|
||||||
|
"leave": False,
|
||||||
|
}
|
||||||
|
with dest.open("wb") as f, tqdm.tqdm(**progress_settings) as bar:
|
||||||
|
for data in response.iter_content(chunk_size=chunk_size):
|
||||||
|
size = f.write(data)
|
||||||
|
bar.update(size)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user