mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge pull request #6048 from explosion/fix/clone-compat
This commit is contained in:
		
						commit
						f886f5bbc8
					
				| 
						 | 
					@ -1,4 +1,4 @@
 | 
				
			||||||
from typing import Dict, Any, Union, List, Optional, TYPE_CHECKING
 | 
					from typing import Dict, Any, Union, List, Optional, Tuple, TYPE_CHECKING
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
import shutil
 | 
					import shutil
 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
| 
						 | 
					@ -321,41 +321,62 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
 | 
				
			||||||
    # *that* we can do by path.
 | 
					    # *that* we can do by path.
 | 
				
			||||||
    # We're using Git and sparse checkout to only clone the files we need
 | 
					    # We're using Git and sparse checkout to only clone the files we need
 | 
				
			||||||
    with make_tempdir() as tmp_dir:
 | 
					    with make_tempdir() as tmp_dir:
 | 
				
			||||||
 | 
					        git_version = get_git_version()
 | 
				
			||||||
 | 
					        supports_sparse = git_version >= (2, 22)
 | 
				
			||||||
        # This is the "clone, but don't download anything" part.
 | 
					        # This is the "clone, but don't download anything" part.
 | 
				
			||||||
        cmd = (
 | 
					        cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} "
 | 
				
			||||||
            f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
 | 
					        if supports_sparse:
 | 
				
			||||||
            f"--filter=blob:none "  # <-- The key bit
 | 
					            cmd += f"--filter=blob:none"  # <-- The key bit
 | 
				
			||||||
            f"-b {branch}"
 | 
					        else:
 | 
				
			||||||
        )
 | 
					            msg.warn(
 | 
				
			||||||
 | 
					                f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
 | 
				
			||||||
 | 
					                f"that doesn't fully support sparse checkout yet. This means that "
 | 
				
			||||||
 | 
					                f"more files than necessary may be downloaded temporarily. To "
 | 
				
			||||||
 | 
					                f"only download the files needed, upgrade to Git v2.22 or above."
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
        _attempt_run_command(cmd)
 | 
					        _attempt_run_command(cmd)
 | 
				
			||||||
        # Now we need to find the missing filenames for the subpath we want.
 | 
					        # Now we need to find the missing filenames for the subpath we want.
 | 
				
			||||||
        # Looking for this 'rev-list' command in the git --help? Hah.
 | 
					        # Looking for this 'rev-list' command in the git --help? Hah.
 | 
				
			||||||
        cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
 | 
					        cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if supports_sparse else ''} -- {subpath}"
 | 
				
			||||||
        ret = _attempt_run_command(cmd)
 | 
					        ret = _attempt_run_command(cmd)
 | 
				
			||||||
        git_repo = _from_http_to_git(repo)
 | 
					        git_repo = _from_http_to_git(repo)
 | 
				
			||||||
        # Now pass those missings into another bit of git internals
 | 
					        # Now pass those missings into another bit of git internals
 | 
				
			||||||
        missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
 | 
					        missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
 | 
				
			||||||
        if not missings:
 | 
					        if supports_sparse and not missings:
 | 
				
			||||||
           err = f"Could not find any relevant files for '{subpath}'. " \
 | 
					            err = (
 | 
				
			||||||
                 f"Did you specify a correct and complete path within repo '{repo}' " \
 | 
					                f"Could not find any relevant files for '{subpath}'. "
 | 
				
			||||||
                 f"and branch {branch}?"
 | 
					                f"Did you specify a correct and complete path within repo '{repo}' "
 | 
				
			||||||
           msg.fail(err, exits=1)
 | 
					                f"and branch {branch}?"
 | 
				
			||||||
        cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
 | 
					            )
 | 
				
			||||||
        _attempt_run_command(cmd)
 | 
					            msg.fail(err, exits=1)
 | 
				
			||||||
 | 
					        if supports_sparse:
 | 
				
			||||||
 | 
					            cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
 | 
				
			||||||
 | 
					            _attempt_run_command(cmd)
 | 
				
			||||||
        # And finally, we can checkout our subpath
 | 
					        # And finally, we can checkout our subpath
 | 
				
			||||||
        cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
 | 
					        cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
 | 
				
			||||||
        _attempt_run_command(cmd)
 | 
					        _attempt_run_command(cmd)
 | 
				
			||||||
        # We need Path(name) to make sure we also support subdirectories
 | 
					        # We need Path(name) to make sure we also support subdirectories
 | 
				
			||||||
        shutil.move(str(tmp_dir / Path(subpath)), str(dest))
 | 
					        shutil.move(str(tmp_dir / Path(subpath)), str(dest))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _attempt_run_command(cmd):
 | 
					
 | 
				
			||||||
 | 
					def get_git_version() -> Tuple[int, int]:
 | 
				
			||||||
 | 
					    ret = _attempt_run_command(["git", "--version"])
 | 
				
			||||||
 | 
					    # TODO: this seems kinda brittle?
 | 
				
			||||||
 | 
					    version = ret.stdout[11:].strip().split(".")
 | 
				
			||||||
 | 
					    return (int(version[0]), int(version[1]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _attempt_run_command(cmd: Union[str, List[str]]):
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        return run_command(cmd, capture=True)
 | 
					        return run_command(cmd, capture=True)
 | 
				
			||||||
    except subprocess.CalledProcessError as e:
 | 
					    except subprocess.CalledProcessError as e:
 | 
				
			||||||
        err = f"Could not run command: {cmd}."
 | 
					        err = f"Could not run command"
 | 
				
			||||||
        msg.fail(err, exits=1)
 | 
					        msg.fail(err)
 | 
				
			||||||
 | 
					        print(cmd)
 | 
				
			||||||
 | 
					        sys.exit(1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _from_http_to_git(repo):
 | 
					
 | 
				
			||||||
 | 
					def _from_http_to_git(repo: str) -> str:
 | 
				
			||||||
    if repo.startswith("http://"):
 | 
					    if repo.startswith("http://"):
 | 
				
			||||||
        repo = repo.replace(r"http://", r"https://")
 | 
					        repo = repo.replace(r"http://", r"https://")
 | 
				
			||||||
    if repo.startswith(r"https://"):
 | 
					    if repo.startswith(r"https://"):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -27,7 +27,7 @@ def project_clone_cli(
 | 
				
			||||||
    DOCS: https://nightly.spacy.io/api/cli#project-clone
 | 
					    DOCS: https://nightly.spacy.io/api/cli#project-clone
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    if dest is None:
 | 
					    if dest is None:
 | 
				
			||||||
        dest = Path.cwd() / name
 | 
					        dest = Path.cwd() / Path(name).parts[-1]
 | 
				
			||||||
    project_clone(name, dest, repo=repo, branch=branch)
 | 
					    project_clone(name, dest, repo=repo, branch=branch)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -648,7 +648,7 @@ def join_command(command: List[str]) -> str:
 | 
				
			||||||
    return " ".join(shlex.quote(cmd) for cmd in command)
 | 
					    return " ".join(shlex.quote(cmd) for cmd in command)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def run_command(command: Union[str, List[str]], *, capture=False, stdin=None) -> None:
 | 
					def run_command(command: Union[str, List[str]], *, capture=False, stdin=None):
 | 
				
			||||||
    """Run a command on the command line as a subprocess. If the subprocess
 | 
					    """Run a command on the command line as a subprocess. If the subprocess
 | 
				
			||||||
    returns a non-zero exit code, a system exit is performed.
 | 
					    returns a non-zero exit code, a system exit is performed.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user