mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge pull request #6048 from explosion/fix/clone-compat
This commit is contained in:
		
						commit
						f886f5bbc8
					
				| 
						 | 
				
			
			@ -1,4 +1,4 @@
 | 
			
		|||
from typing import Dict, Any, Union, List, Optional, TYPE_CHECKING
 | 
			
		||||
from typing import Dict, Any, Union, List, Optional, Tuple, TYPE_CHECKING
 | 
			
		||||
import sys
 | 
			
		||||
import shutil
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
| 
						 | 
				
			
			@ -321,25 +321,35 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
 | 
			
		|||
    # *that* we can do by path.
 | 
			
		||||
    # We're using Git and sparse checkout to only clone the files we need
 | 
			
		||||
    with make_tempdir() as tmp_dir:
 | 
			
		||||
        git_version = get_git_version()
 | 
			
		||||
        supports_sparse = git_version >= (2, 22)
 | 
			
		||||
        # This is the "clone, but don't download anything" part.
 | 
			
		||||
        cmd = (
 | 
			
		||||
            f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
 | 
			
		||||
            f"--filter=blob:none "  # <-- The key bit
 | 
			
		||||
            f"-b {branch}"
 | 
			
		||||
        cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} "
 | 
			
		||||
        if supports_sparse:
 | 
			
		||||
            cmd += f"--filter=blob:none"  # <-- The key bit
 | 
			
		||||
        else:
 | 
			
		||||
            msg.warn(
 | 
			
		||||
                f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
 | 
			
		||||
                f"that doesn't fully support sparse checkout yet. This means that "
 | 
			
		||||
                f"more files than necessary may be downloaded temporarily. To "
 | 
			
		||||
                f"only download the files needed, upgrade to Git v2.22 or above."
 | 
			
		||||
            )
 | 
			
		||||
        _attempt_run_command(cmd)
 | 
			
		||||
        # Now we need to find the missing filenames for the subpath we want.
 | 
			
		||||
        # Looking for this 'rev-list' command in the git --help? Hah.
 | 
			
		||||
        cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
 | 
			
		||||
        cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if supports_sparse else ''} -- {subpath}"
 | 
			
		||||
        ret = _attempt_run_command(cmd)
 | 
			
		||||
        git_repo = _from_http_to_git(repo)
 | 
			
		||||
        # Now pass those missings into another bit of git internals
 | 
			
		||||
        missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
 | 
			
		||||
        if not missings:
 | 
			
		||||
           err = f"Could not find any relevant files for '{subpath}'. " \
 | 
			
		||||
                 f"Did you specify a correct and complete path within repo '{repo}' " \
 | 
			
		||||
        if supports_sparse and not missings:
 | 
			
		||||
            err = (
 | 
			
		||||
                f"Could not find any relevant files for '{subpath}'. "
 | 
			
		||||
                f"Did you specify a correct and complete path within repo '{repo}' "
 | 
			
		||||
                f"and branch {branch}?"
 | 
			
		||||
            )
 | 
			
		||||
            msg.fail(err, exits=1)
 | 
			
		||||
        if supports_sparse:
 | 
			
		||||
            cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
 | 
			
		||||
            _attempt_run_command(cmd)
 | 
			
		||||
        # And finally, we can checkout our subpath
 | 
			
		||||
| 
						 | 
				
			
			@ -348,14 +358,25 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
 | 
			
		|||
        # We need Path(name) to make sure we also support subdirectories
 | 
			
		||||
        shutil.move(str(tmp_dir / Path(subpath)), str(dest))
 | 
			
		||||
 | 
			
		||||
def _attempt_run_command(cmd):
 | 
			
		||||
 | 
			
		||||
def get_git_version() -> Tuple[int, int]:
 | 
			
		||||
    ret = _attempt_run_command(["git", "--version"])
 | 
			
		||||
    # TODO: this seems kinda brittle?
 | 
			
		||||
    version = ret.stdout[11:].strip().split(".")
 | 
			
		||||
    return (int(version[0]), int(version[1]))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _attempt_run_command(cmd: Union[str, List[str]]):
 | 
			
		||||
    try:
 | 
			
		||||
        return run_command(cmd, capture=True)
 | 
			
		||||
    except subprocess.CalledProcessError as e:
 | 
			
		||||
        err = f"Could not run command: {cmd}."
 | 
			
		||||
        msg.fail(err, exits=1)
 | 
			
		||||
        err = f"Could not run command"
 | 
			
		||||
        msg.fail(err)
 | 
			
		||||
        print(cmd)
 | 
			
		||||
        sys.exit(1)
 | 
			
		||||
 | 
			
		||||
def _from_http_to_git(repo):
 | 
			
		||||
 | 
			
		||||
def _from_http_to_git(repo: str) -> str:
 | 
			
		||||
    if repo.startswith("http://"):
 | 
			
		||||
        repo = repo.replace(r"http://", r"https://")
 | 
			
		||||
    if repo.startswith(r"https://"):
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -27,7 +27,7 @@ def project_clone_cli(
 | 
			
		|||
    DOCS: https://nightly.spacy.io/api/cli#project-clone
 | 
			
		||||
    """
 | 
			
		||||
    if dest is None:
 | 
			
		||||
        dest = Path.cwd() / name
 | 
			
		||||
        dest = Path.cwd() / Path(name).parts[-1]
 | 
			
		||||
    project_clone(name, dest, repo=repo, branch=branch)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -648,7 +648,7 @@ def join_command(command: List[str]) -> str:
 | 
			
		|||
    return " ".join(shlex.quote(cmd) for cmd in command)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def run_command(command: Union[str, List[str]], *, capture=False, stdin=None) -> None:
 | 
			
		||||
def run_command(command: Union[str, List[str]], *, capture=False, stdin=None):
 | 
			
		||||
    """Run a command on the command line as a subprocess. If the subprocess
 | 
			
		||||
    returns a non-zero exit code, a system exit is performed.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user