diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index b03f3eb69..4694fddbb 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -1,4 +1,4 @@ -from typing import Dict, Any, Union, List, Optional, TYPE_CHECKING +from typing import Dict, Any, Union, List, Optional, Tuple, TYPE_CHECKING import sys import shutil from pathlib import Path @@ -321,41 +321,62 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m # *that* we can do by path. # We're using Git and sparse checkout to only clone the files we need with make_tempdir() as tmp_dir: + git_version = get_git_version() + supports_sparse = git_version >= (2, 22) # This is the "clone, but don't download anything" part. - cmd = ( - f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " - f"--filter=blob:none " # <-- The key bit - f"-b {branch}" - ) + cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} " + if supports_sparse: + cmd += f"--filter=blob:none" # <-- The key bit + else: + msg.warn( + f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) " + f"that doesn't fully support sparse checkout yet. This means that " + f"more files than necessary may be cloned. To only download the " + f"files needed, upgrade to Git v2.22 or above." + ) _attempt_run_command(cmd) # Now we need to find the missing filenames for the subpath we want. # Looking for this 'rev-list' command in the git --help? Hah. - cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}" + cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if supports_sparse else ''} -- {subpath}" ret = _attempt_run_command(cmd) git_repo = _from_http_to_git(repo) # Now pass those missings into another bit of git internals missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")]) - if not missings: - err = f"Could not find any relevant files for '{subpath}'. " \ - f"Did you specify a correct and complete path within repo '{repo}' " \ - f"and branch {branch}?" - msg.fail(err, exits=1) - cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}" - _attempt_run_command(cmd) + if supports_sparse and not missings: + err = ( + f"Could not find any relevant files for '{subpath}'. " + f"Did you specify a correct and complete path within repo '{repo}' " + f"and branch {branch}?" + ) + msg.fail(err, exits=1) + if supports_sparse: + cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}" + _attempt_run_command(cmd) # And finally, we can checkout our subpath cmd = f"git -C {tmp_dir} checkout {branch} {subpath}" _attempt_run_command(cmd) # We need Path(name) to make sure we also support subdirectories shutil.move(str(tmp_dir / Path(subpath)), str(dest)) -def _attempt_run_command(cmd): + +def get_git_version() -> Tuple[int, int]: + ret = _attempt_run_command(["git", "--version"]) + # TODO: this seems kinda brittle? + version = ret.stdout[11:].strip().split(".") + return (int(version[0]), int(version[1])) + + +def _attempt_run_command(cmd: Union[str, List[str]]): try: return run_command(cmd, capture=True) except subprocess.CalledProcessError as e: - err = f"Could not run command: {cmd}." - msg.fail(err, exits=1) + err = f"Could not run command" + msg.fail(err) + print(cmd) + sys.exit(1) -def _from_http_to_git(repo): + +def _from_http_to_git(repo: str) -> str: if repo.startswith("http://"): repo = repo.replace(r"http://", r"https://") if repo.startswith(r"https://"): diff --git a/spacy/cli/project/clone.py b/spacy/cli/project/clone.py index 427df490f..ab617e4ba 100644 --- a/spacy/cli/project/clone.py +++ b/spacy/cli/project/clone.py @@ -27,7 +27,7 @@ def project_clone_cli( DOCS: https://nightly.spacy.io/api/cli#project-clone """ if dest is None: - dest = Path.cwd() / name + dest = Path.cwd() / Path(name).parts[-1] project_clone(name, dest, repo=repo, branch=branch) diff --git a/spacy/util.py b/spacy/util.py index bd567ddc7..d8df04554 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -648,7 +648,7 @@ def join_command(command: List[str]) -> str: return " ".join(shlex.quote(cmd) for cmd in command) -def run_command(command: Union[str, List[str]], *, capture=False, stdin=None) -> None: +def run_command(command: Union[str, List[str]], *, capture=False, stdin=None): """Run a command on the command line as a subprocess. If the subprocess returns a non-zero exit code, a system exit is performed.