mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Fix sparse checkout and error handling
This commit is contained in:
parent
b854e0bef9
commit
c052017025
|
@ -300,7 +300,9 @@ def ensure_pathy(path):
|
|||
return Pathy(path)
|
||||
|
||||
|
||||
def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "master"):
|
||||
def git_checkout(
|
||||
repo: str, subpath: str, dest: Path, *, branch: str = "master", sparse: bool = False
|
||||
):
|
||||
git_version = get_git_version()
|
||||
if dest.exists():
|
||||
msg.fail("Destination of checkout must not exist", exits=1)
|
||||
|
@ -323,11 +325,14 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
|
|||
# We're using Git and sparse checkout to only clone the files we need
|
||||
with make_tempdir() as tmp_dir:
|
||||
supports_sparse = git_version >= (2, 22)
|
||||
use_sparse = supports_sparse and sparse
|
||||
# This is the "clone, but don't download anything" part.
|
||||
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} "
|
||||
if supports_sparse:
|
||||
if use_sparse:
|
||||
cmd += f"--filter=blob:none" # <-- The key bit
|
||||
else:
|
||||
# Only show warnings if the user explicitly wants sparse checkout but
|
||||
# the Git version doesn't support it
|
||||
elif sparse:
|
||||
err_old = (
|
||||
f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
|
||||
f"that doesn't fully support sparse checkout yet."
|
||||
|
@ -342,19 +347,19 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
|
|||
try_run_command(cmd)
|
||||
# Now we need to find the missing filenames for the subpath we want.
|
||||
# Looking for this 'rev-list' command in the git --help? Hah.
|
||||
cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if supports_sparse else ''} -- {subpath}"
|
||||
cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if use_sparse else ''} -- {subpath}"
|
||||
ret = try_run_command(cmd)
|
||||
git_repo = _from_http_to_git(repo)
|
||||
# Now pass those missings into another bit of git internals
|
||||
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
|
||||
if supports_sparse and not missings:
|
||||
if use_sparse and not missings:
|
||||
err = (
|
||||
f"Could not find any relevant files for '{subpath}'. "
|
||||
f"Did you specify a correct and complete path within repo '{repo}' "
|
||||
f"and branch {branch}?"
|
||||
)
|
||||
msg.fail(err, exits=1)
|
||||
if supports_sparse:
|
||||
if use_sparse:
|
||||
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
|
||||
try_run_command(cmd)
|
||||
# And finally, we can checkout our subpath
|
||||
|
|
|
@ -6,14 +6,15 @@ import shutil
|
|||
import requests
|
||||
|
||||
from ...util import ensure_path, working_dir
|
||||
from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum
|
||||
from .._util import download_file, git_sparse_checkout, get_git_version
|
||||
from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
|
||||
from .._util import get_checksum, download_file, git_checkout, get_git_version
|
||||
|
||||
|
||||
@project_cli.command("assets")
|
||||
def project_assets_cli(
|
||||
# fmt: off
|
||||
project_dir: Path = Arg(Path.cwd(), help="Path to cloned project. Defaults to current working directory.", exists=True, file_okay=False),
|
||||
sparse_checkout: bool = Opt(False, "--sparse", "-S", help="Use sparse checkout for assets provided via Git, to only check out and clone the files needed. Requires Git v22.2+.")
|
||||
# fmt: on
|
||||
):
|
||||
"""Fetch project assets like datasets and pretrained weights. Assets are
|
||||
|
@ -23,10 +24,10 @@ def project_assets_cli(
|
|||
|
||||
DOCS: https://nightly.spacy.io/api/cli#project-assets
|
||||
"""
|
||||
project_assets(project_dir)
|
||||
project_assets(project_dir, sparse_checkout=sparse_checkout)
|
||||
|
||||
|
||||
def project_assets(project_dir: Path) -> None:
|
||||
def project_assets(project_dir: Path, *, sparse_checkout: bool = False) -> None:
|
||||
"""Fetch assets for a project using DVC if possible.
|
||||
|
||||
project_dir (Path): Path to project directory.
|
||||
|
@ -58,11 +59,12 @@ def project_assets(project_dir: Path) -> None:
|
|||
shutil.rmtree(dest)
|
||||
else:
|
||||
dest.unlink()
|
||||
git_sparse_checkout(
|
||||
git_checkout(
|
||||
asset["git"]["repo"],
|
||||
asset["git"]["path"],
|
||||
dest,
|
||||
branch=asset["git"].get("branch"),
|
||||
sparse=sparse_checkout,
|
||||
)
|
||||
else:
|
||||
url = asset.get("url")
|
||||
|
|
|
@ -7,7 +7,7 @@ import re
|
|||
from ... import about
|
||||
from ...util import ensure_path
|
||||
from .._util import project_cli, Arg, Opt, COMMAND, PROJECT_FILE
|
||||
from .._util import git_sparse_checkout, get_git_version
|
||||
from .._util import git_checkout, get_git_version
|
||||
|
||||
|
||||
@project_cli.command("clone")
|
||||
|
@ -16,7 +16,8 @@ def project_clone_cli(
|
|||
name: str = Arg(..., help="The name of the template to clone"),
|
||||
dest: Optional[Path] = Arg(None, help="Where to clone the project. Defaults to current working directory", exists=False),
|
||||
repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to clone from"),
|
||||
branch: str = Opt(about.__projects_branch__, "--branch", "-b", help="The branch to clone from")
|
||||
branch: str = Opt(about.__projects_branch__, "--branch", "-b", help="The branch to clone from"),
|
||||
sparse_checkout: bool = Opt(False, "--sparse", "-S", help="Use sparse Git checkout to only check out and clone the files needed. Requires Git v22.2+.")
|
||||
# fmt: on
|
||||
):
|
||||
"""Clone a project template from a repository. Calls into "git" and will
|
||||
|
@ -28,7 +29,7 @@ def project_clone_cli(
|
|||
"""
|
||||
if dest is None:
|
||||
dest = Path.cwd() / Path(name).parts[-1]
|
||||
project_clone(name, dest, repo=repo, branch=branch)
|
||||
project_clone(name, dest, repo=repo, branch=branch, sparse_checkout=sparse_checkout)
|
||||
|
||||
|
||||
def project_clone(
|
||||
|
@ -37,6 +38,7 @@ def project_clone(
|
|||
*,
|
||||
repo: str = about.__projects__,
|
||||
branch: str = about.__projects_branch__,
|
||||
sparse_checkout: bool = False,
|
||||
) -> None:
|
||||
"""Clone a project template from a repository.
|
||||
|
||||
|
@ -50,7 +52,7 @@ def project_clone(
|
|||
project_dir = dest.resolve()
|
||||
repo_name = re.sub(r"(http(s?)):\/\/github.com/", "", repo)
|
||||
try:
|
||||
git_sparse_checkout(repo, name, dest, branch=branch)
|
||||
git_checkout(repo, name, dest, branch=branch, sparse=sparse_checkout)
|
||||
except subprocess.CalledProcessError:
|
||||
err = f"Could not clone '{name}' from repo '{repo_name}'"
|
||||
msg.fail(err, exits=1)
|
||||
|
|
|
@ -680,7 +680,10 @@ def run_command(
|
|||
Errors.E970.format(str_command=" ".join(command), tool=command[0])
|
||||
) from None
|
||||
except subprocess.CalledProcessError as e:
|
||||
# We don't want a duplicate traceback here
|
||||
# We don't want a duplicate traceback here so we're making sure the
|
||||
# CalledProcessError isn't re-raised. We also print both the string
|
||||
# message and the stderr, in case the error only has one of them.
|
||||
print(e.stderr)
|
||||
print(e)
|
||||
sys.exit(1)
|
||||
if ret.returncode != 0:
|
||||
|
|
Loading…
Reference in New Issue
Block a user