mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Merge pull request #6064 from explosion/fix/sparse-checkout-ux
Fix sparse checkout and error handling
This commit is contained in:
commit
9cc304c194
|
@ -300,7 +300,9 @@ def ensure_pathy(path):
|
||||||
return Pathy(path)
|
return Pathy(path)
|
||||||
|
|
||||||
|
|
||||||
def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "master"):
|
def git_checkout(
|
||||||
|
repo: str, subpath: str, dest: Path, *, branch: str = "master", sparse: bool = False
|
||||||
|
):
|
||||||
git_version = get_git_version()
|
git_version = get_git_version()
|
||||||
if dest.exists():
|
if dest.exists():
|
||||||
msg.fail("Destination of checkout must not exist", exits=1)
|
msg.fail("Destination of checkout must not exist", exits=1)
|
||||||
|
@ -323,11 +325,14 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
|
||||||
# We're using Git and sparse checkout to only clone the files we need
|
# We're using Git and sparse checkout to only clone the files we need
|
||||||
with make_tempdir() as tmp_dir:
|
with make_tempdir() as tmp_dir:
|
||||||
supports_sparse = git_version >= (2, 22)
|
supports_sparse = git_version >= (2, 22)
|
||||||
|
use_sparse = supports_sparse and sparse
|
||||||
# This is the "clone, but don't download anything" part.
|
# This is the "clone, but don't download anything" part.
|
||||||
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} "
|
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} "
|
||||||
if supports_sparse:
|
if use_sparse:
|
||||||
cmd += f"--filter=blob:none" # <-- The key bit
|
cmd += f"--filter=blob:none" # <-- The key bit
|
||||||
else:
|
# Only show warnings if the user explicitly wants sparse checkout but
|
||||||
|
# the Git version doesn't support it
|
||||||
|
elif sparse:
|
||||||
err_old = (
|
err_old = (
|
||||||
f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
|
f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
|
||||||
f"that doesn't fully support sparse checkout yet."
|
f"that doesn't fully support sparse checkout yet."
|
||||||
|
@ -342,19 +347,19 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
|
||||||
try_run_command(cmd)
|
try_run_command(cmd)
|
||||||
# Now we need to find the missing filenames for the subpath we want.
|
# Now we need to find the missing filenames for the subpath we want.
|
||||||
# Looking for this 'rev-list' command in the git --help? Hah.
|
# Looking for this 'rev-list' command in the git --help? Hah.
|
||||||
cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if supports_sparse else ''} -- {subpath}"
|
cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if use_sparse else ''} -- {subpath}"
|
||||||
ret = try_run_command(cmd)
|
ret = try_run_command(cmd)
|
||||||
git_repo = _from_http_to_git(repo)
|
git_repo = _from_http_to_git(repo)
|
||||||
# Now pass those missings into another bit of git internals
|
# Now pass those missings into another bit of git internals
|
||||||
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
|
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
|
||||||
if supports_sparse and not missings:
|
if use_sparse and not missings:
|
||||||
err = (
|
err = (
|
||||||
f"Could not find any relevant files for '{subpath}'. "
|
f"Could not find any relevant files for '{subpath}'. "
|
||||||
f"Did you specify a correct and complete path within repo '{repo}' "
|
f"Did you specify a correct and complete path within repo '{repo}' "
|
||||||
f"and branch {branch}?"
|
f"and branch {branch}?"
|
||||||
)
|
)
|
||||||
msg.fail(err, exits=1)
|
msg.fail(err, exits=1)
|
||||||
if supports_sparse:
|
if use_sparse:
|
||||||
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
|
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
|
||||||
try_run_command(cmd)
|
try_run_command(cmd)
|
||||||
# And finally, we can checkout our subpath
|
# And finally, we can checkout our subpath
|
||||||
|
|
|
@ -6,14 +6,15 @@ import shutil
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from ...util import ensure_path, working_dir
|
from ...util import ensure_path, working_dir
|
||||||
from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum
|
from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
|
||||||
from .._util import download_file, git_sparse_checkout, get_git_version
|
from .._util import get_checksum, download_file, git_checkout, get_git_version
|
||||||
|
|
||||||
|
|
||||||
@project_cli.command("assets")
|
@project_cli.command("assets")
|
||||||
def project_assets_cli(
|
def project_assets_cli(
|
||||||
# fmt: off
|
# fmt: off
|
||||||
project_dir: Path = Arg(Path.cwd(), help="Path to cloned project. Defaults to current working directory.", exists=True, file_okay=False),
|
project_dir: Path = Arg(Path.cwd(), help="Path to cloned project. Defaults to current working directory.", exists=True, file_okay=False),
|
||||||
|
sparse_checkout: bool = Opt(False, "--sparse", "-S", help="Use sparse checkout for assets provided via Git, to only check out and clone the files needed. Requires Git v22.2+.")
|
||||||
# fmt: on
|
# fmt: on
|
||||||
):
|
):
|
||||||
"""Fetch project assets like datasets and pretrained weights. Assets are
|
"""Fetch project assets like datasets and pretrained weights. Assets are
|
||||||
|
@ -23,10 +24,10 @@ def project_assets_cli(
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/cli#project-assets
|
DOCS: https://nightly.spacy.io/api/cli#project-assets
|
||||||
"""
|
"""
|
||||||
project_assets(project_dir)
|
project_assets(project_dir, sparse_checkout=sparse_checkout)
|
||||||
|
|
||||||
|
|
||||||
def project_assets(project_dir: Path) -> None:
|
def project_assets(project_dir: Path, *, sparse_checkout: bool = False) -> None:
|
||||||
"""Fetch assets for a project using DVC if possible.
|
"""Fetch assets for a project using DVC if possible.
|
||||||
|
|
||||||
project_dir (Path): Path to project directory.
|
project_dir (Path): Path to project directory.
|
||||||
|
@ -58,11 +59,12 @@ def project_assets(project_dir: Path) -> None:
|
||||||
shutil.rmtree(dest)
|
shutil.rmtree(dest)
|
||||||
else:
|
else:
|
||||||
dest.unlink()
|
dest.unlink()
|
||||||
git_sparse_checkout(
|
git_checkout(
|
||||||
asset["git"]["repo"],
|
asset["git"]["repo"],
|
||||||
asset["git"]["path"],
|
asset["git"]["path"],
|
||||||
dest,
|
dest,
|
||||||
branch=asset["git"].get("branch"),
|
branch=asset["git"].get("branch"),
|
||||||
|
sparse=sparse_checkout,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
url = asset.get("url")
|
url = asset.get("url")
|
||||||
|
|
|
@ -7,7 +7,7 @@ import re
|
||||||
from ... import about
|
from ... import about
|
||||||
from ...util import ensure_path
|
from ...util import ensure_path
|
||||||
from .._util import project_cli, Arg, Opt, COMMAND, PROJECT_FILE
|
from .._util import project_cli, Arg, Opt, COMMAND, PROJECT_FILE
|
||||||
from .._util import git_sparse_checkout, get_git_version
|
from .._util import git_checkout, get_git_version
|
||||||
|
|
||||||
|
|
||||||
@project_cli.command("clone")
|
@project_cli.command("clone")
|
||||||
|
@ -16,7 +16,8 @@ def project_clone_cli(
|
||||||
name: str = Arg(..., help="The name of the template to clone"),
|
name: str = Arg(..., help="The name of the template to clone"),
|
||||||
dest: Optional[Path] = Arg(None, help="Where to clone the project. Defaults to current working directory", exists=False),
|
dest: Optional[Path] = Arg(None, help="Where to clone the project. Defaults to current working directory", exists=False),
|
||||||
repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to clone from"),
|
repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to clone from"),
|
||||||
branch: str = Opt(about.__projects_branch__, "--branch", "-b", help="The branch to clone from")
|
branch: str = Opt(about.__projects_branch__, "--branch", "-b", help="The branch to clone from"),
|
||||||
|
sparse_checkout: bool = Opt(False, "--sparse", "-S", help="Use sparse Git checkout to only check out and clone the files needed. Requires Git v22.2+.")
|
||||||
# fmt: on
|
# fmt: on
|
||||||
):
|
):
|
||||||
"""Clone a project template from a repository. Calls into "git" and will
|
"""Clone a project template from a repository. Calls into "git" and will
|
||||||
|
@ -28,7 +29,7 @@ def project_clone_cli(
|
||||||
"""
|
"""
|
||||||
if dest is None:
|
if dest is None:
|
||||||
dest = Path.cwd() / Path(name).parts[-1]
|
dest = Path.cwd() / Path(name).parts[-1]
|
||||||
project_clone(name, dest, repo=repo, branch=branch)
|
project_clone(name, dest, repo=repo, branch=branch, sparse_checkout=sparse_checkout)
|
||||||
|
|
||||||
|
|
||||||
def project_clone(
|
def project_clone(
|
||||||
|
@ -37,6 +38,7 @@ def project_clone(
|
||||||
*,
|
*,
|
||||||
repo: str = about.__projects__,
|
repo: str = about.__projects__,
|
||||||
branch: str = about.__projects_branch__,
|
branch: str = about.__projects_branch__,
|
||||||
|
sparse_checkout: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Clone a project template from a repository.
|
"""Clone a project template from a repository.
|
||||||
|
|
||||||
|
@ -50,7 +52,7 @@ def project_clone(
|
||||||
project_dir = dest.resolve()
|
project_dir = dest.resolve()
|
||||||
repo_name = re.sub(r"(http(s?)):\/\/github.com/", "", repo)
|
repo_name = re.sub(r"(http(s?)):\/\/github.com/", "", repo)
|
||||||
try:
|
try:
|
||||||
git_sparse_checkout(repo, name, dest, branch=branch)
|
git_checkout(repo, name, dest, branch=branch, sparse=sparse_checkout)
|
||||||
except subprocess.CalledProcessError:
|
except subprocess.CalledProcessError:
|
||||||
err = f"Could not clone '{name}' from repo '{repo_name}'"
|
err = f"Could not clone '{name}' from repo '{repo_name}'"
|
||||||
msg.fail(err, exits=1)
|
msg.fail(err, exits=1)
|
||||||
|
|
|
@ -680,7 +680,10 @@ def run_command(
|
||||||
Errors.E970.format(str_command=" ".join(command), tool=command[0])
|
Errors.E970.format(str_command=" ".join(command), tool=command[0])
|
||||||
) from None
|
) from None
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
# We don't want a duplicate traceback here
|
# We don't want a duplicate traceback here so we're making sure the
|
||||||
|
# CalledProcessError isn't re-raised. We also print both the string
|
||||||
|
# message and the stderr, in case the error only has one of them.
|
||||||
|
print(e.stderr)
|
||||||
print(e)
|
print(e)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
if ret.returncode != 0:
|
if ret.returncode != 0:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user