mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
WIP: fix project clone compatibility
This commit is contained in:
parent
4fec8c39a3
commit
3e83a509bb
|
@ -1,4 +1,4 @@
|
||||||
from typing import Dict, Any, Union, List, Optional, TYPE_CHECKING
|
from typing import Dict, Any, Union, List, Optional, Tuple, TYPE_CHECKING
|
||||||
import sys
|
import sys
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -321,25 +321,35 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
|
||||||
# *that* we can do by path.
|
# *that* we can do by path.
|
||||||
# We're using Git and sparse checkout to only clone the files we need
|
# We're using Git and sparse checkout to only clone the files we need
|
||||||
with make_tempdir() as tmp_dir:
|
with make_tempdir() as tmp_dir:
|
||||||
|
git_version = get_git_version()
|
||||||
|
supports_sparse = git_version >= (2, 22)
|
||||||
# This is the "clone, but don't download anything" part.
|
# This is the "clone, but don't download anything" part.
|
||||||
cmd = (
|
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} "
|
||||||
f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
|
if supports_sparse:
|
||||||
f"--filter=blob:none " # <-- The key bit
|
cmd += f"--filter=blob:none" # <-- The key bit
|
||||||
f"-b {branch}"
|
else:
|
||||||
|
msg.warn(
|
||||||
|
f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
|
||||||
|
f"that doesn't fully support sparse checkout yet. This means that "
|
||||||
|
f"more files than necessary may be cloned. To only download the "
|
||||||
|
f"files needed, upgrade to Git v2.22 or above."
|
||||||
)
|
)
|
||||||
_attempt_run_command(cmd)
|
_attempt_run_command(cmd)
|
||||||
# Now we need to find the missing filenames for the subpath we want.
|
# Now we need to find the missing filenames for the subpath we want.
|
||||||
# Looking for this 'rev-list' command in the git --help? Hah.
|
# Looking for this 'rev-list' command in the git --help? Hah.
|
||||||
cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
|
cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if supports_sparse else ''} -- {subpath}"
|
||||||
ret = _attempt_run_command(cmd)
|
ret = _attempt_run_command(cmd)
|
||||||
git_repo = _from_http_to_git(repo)
|
git_repo = _from_http_to_git(repo)
|
||||||
# Now pass those missings into another bit of git internals
|
# Now pass those missings into another bit of git internals
|
||||||
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
|
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
|
||||||
if not missings:
|
if supports_sparse and not missings:
|
||||||
err = f"Could not find any relevant files for '{subpath}'. " \
|
err = (
|
||||||
f"Did you specify a correct and complete path within repo '{repo}' " \
|
f"Could not find any relevant files for '{subpath}'. "
|
||||||
|
f"Did you specify a correct and complete path within repo '{repo}' "
|
||||||
f"and branch {branch}?"
|
f"and branch {branch}?"
|
||||||
|
)
|
||||||
msg.fail(err, exits=1)
|
msg.fail(err, exits=1)
|
||||||
|
if supports_sparse:
|
||||||
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
|
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
|
||||||
_attempt_run_command(cmd)
|
_attempt_run_command(cmd)
|
||||||
# And finally, we can checkout our subpath
|
# And finally, we can checkout our subpath
|
||||||
|
@ -348,14 +358,25 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
|
||||||
# We need Path(name) to make sure we also support subdirectories
|
# We need Path(name) to make sure we also support subdirectories
|
||||||
shutil.move(str(tmp_dir / Path(subpath)), str(dest))
|
shutil.move(str(tmp_dir / Path(subpath)), str(dest))
|
||||||
|
|
||||||
def _attempt_run_command(cmd):
|
|
||||||
|
def get_git_version() -> Tuple[int, int]:
|
||||||
|
ret = _attempt_run_command(["git", "--version"])
|
||||||
|
# TODO: this seems kinda brittle?
|
||||||
|
version = ret.stdout[11:].strip().split(".")
|
||||||
|
return (int(version[0]), int(version[1]))
|
||||||
|
|
||||||
|
|
||||||
|
def _attempt_run_command(cmd: Union[str, List[str]]):
|
||||||
try:
|
try:
|
||||||
return run_command(cmd, capture=True)
|
return run_command(cmd, capture=True)
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
err = f"Could not run command: {cmd}."
|
err = f"Could not run command"
|
||||||
msg.fail(err, exits=1)
|
msg.fail(err)
|
||||||
|
print(cmd)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
def _from_http_to_git(repo):
|
|
||||||
|
def _from_http_to_git(repo: str) -> str:
|
||||||
if repo.startswith("http://"):
|
if repo.startswith("http://"):
|
||||||
repo = repo.replace(r"http://", r"https://")
|
repo = repo.replace(r"http://", r"https://")
|
||||||
if repo.startswith(r"https://"):
|
if repo.startswith(r"https://"):
|
||||||
|
|
|
@ -27,7 +27,7 @@ def project_clone_cli(
|
||||||
DOCS: https://nightly.spacy.io/api/cli#project-clone
|
DOCS: https://nightly.spacy.io/api/cli#project-clone
|
||||||
"""
|
"""
|
||||||
if dest is None:
|
if dest is None:
|
||||||
dest = Path.cwd() / name
|
dest = Path.cwd() / Path(name).parts[-1]
|
||||||
project_clone(name, dest, repo=repo, branch=branch)
|
project_clone(name, dest, repo=repo, branch=branch)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -648,7 +648,7 @@ def join_command(command: List[str]) -> str:
|
||||||
return " ".join(shlex.quote(cmd) for cmd in command)
|
return " ".join(shlex.quote(cmd) for cmd in command)
|
||||||
|
|
||||||
|
|
||||||
def run_command(command: Union[str, List[str]], *, capture=False, stdin=None) -> None:
|
def run_command(command: Union[str, List[str]], *, capture=False, stdin=None):
|
||||||
"""Run a command on the command line as a subprocess. If the subprocess
|
"""Run a command on the command line as a subprocess. If the subprocess
|
||||||
returns a non-zero exit code, a system exit is performed.
|
returns a non-zero exit code, a system exit is performed.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user