mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Merge pull request #6094 from explosion/bugfix/run_process
This commit is contained in:
commit
6898b35028
|
@ -308,6 +308,31 @@ def git_checkout(
|
|||
msg.fail("Destination of checkout must not exist", exits=1)
|
||||
if not dest.parent.exists():
|
||||
raise IOError("Parent of destination of checkout must exist")
|
||||
|
||||
if sparse and git_version >= (2, 22):
|
||||
return git_sparse_checkout(repo, subpath, dest, branch)
|
||||
elif sparse:
|
||||
# Only show warnings if the user explicitly wants sparse checkout but
|
||||
# the Git version doesn't support it
|
||||
err_old = (
|
||||
f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
|
||||
f"that doesn't fully support sparse checkout yet."
|
||||
)
|
||||
err_unk = "You're running an unknown version of Git, so sparse checkout has been disabled."
|
||||
msg.warn(
|
||||
f"{err_unk if git_version == (0, 0) else err_old} "
|
||||
f"This means that more files than necessary may be downloaded "
|
||||
f"temporarily. To only download the files needed, make sure "
|
||||
f"you're using Git v2.22 or above."
|
||||
)
|
||||
with make_tempdir() as tmp_dir:
|
||||
cmd = f"git -C {tmp_dir} clone {repo} . -b {branch}"
|
||||
ret = run_command(cmd, capture=True)
|
||||
# We need Path(name) to make sure we also support subdirectories
|
||||
shutil.copytree(str(tmp_dir / Path(subpath)), str(dest))
|
||||
|
||||
|
||||
def git_sparse_checkout(repo, subpath, dest, branch):
|
||||
# We're using Git, partial clone and sparse checkout to
|
||||
# only clone the files we need
|
||||
# This ends up being RIDICULOUS. omg.
|
||||
|
@ -324,47 +349,31 @@ def git_checkout(
|
|||
# *that* we can do by path.
|
||||
# We're using Git and sparse checkout to only clone the files we need
|
||||
with make_tempdir() as tmp_dir:
|
||||
supports_sparse = git_version >= (2, 22)
|
||||
use_sparse = supports_sparse and sparse
|
||||
# This is the "clone, but don't download anything" part.
|
||||
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} "
|
||||
if use_sparse:
|
||||
cmd += f"--filter=blob:none" # <-- The key bit
|
||||
# Only show warnings if the user explicitly wants sparse checkout but
|
||||
# the Git version doesn't support it
|
||||
elif sparse:
|
||||
err_old = (
|
||||
f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
|
||||
f"that doesn't fully support sparse checkout yet."
|
||||
)
|
||||
err_unk = "You're running an unknown version of Git, so sparse checkout has been disabled."
|
||||
msg.warn(
|
||||
f"{err_unk if git_version == (0, 0) else err_old} "
|
||||
f"This means that more files than necessary may be downloaded "
|
||||
f"temporarily. To only download the files needed, make sure "
|
||||
f"you're using Git v2.22 or above."
|
||||
)
|
||||
try_run_command(cmd)
|
||||
cmd = (
|
||||
f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
|
||||
f"-b {branch} --filter=blob:none"
|
||||
)
|
||||
run_command(cmd)
|
||||
# Now we need to find the missing filenames for the subpath we want.
|
||||
# Looking for this 'rev-list' command in the git --help? Hah.
|
||||
cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if use_sparse else ''} -- {subpath}"
|
||||
ret = try_run_command(cmd)
|
||||
cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
|
||||
ret = run_command(cmd, capture=True)
|
||||
git_repo = _from_http_to_git(repo)
|
||||
# Now pass those missings into another bit of git internals
|
||||
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
|
||||
if use_sparse and not missings:
|
||||
if not missings:
|
||||
err = (
|
||||
f"Could not find any relevant files for '{subpath}'. "
|
||||
f"Did you specify a correct and complete path within repo '{repo}' "
|
||||
f"and branch {branch}?"
|
||||
)
|
||||
msg.fail(err, exits=1)
|
||||
if use_sparse:
|
||||
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
|
||||
try_run_command(cmd)
|
||||
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
|
||||
run_command(cmd, capture=True)
|
||||
# And finally, we can checkout our subpath
|
||||
cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
|
||||
try_run_command(cmd)
|
||||
run_command(cmd, capture=True)
|
||||
# We need Path(name) to make sure we also support subdirectories
|
||||
shutil.move(str(tmp_dir / Path(subpath)), str(dest))
|
||||
|
||||
|
@ -378,7 +387,7 @@ def get_git_version(
|
|||
RETURNS (Tuple[int, int]): The version as a (major, minor) tuple. Returns
|
||||
(0, 0) if the version couldn't be determined.
|
||||
"""
|
||||
ret = try_run_command(["git", "--version"], error=error)
|
||||
ret = run_command("git --version", capture=True)
|
||||
stdout = ret.stdout.strip()
|
||||
if not stdout or not stdout.startswith("git version"):
|
||||
return (0, 0)
|
||||
|
@ -386,23 +395,6 @@ def get_git_version(
|
|||
return (int(version[0]), int(version[1]))
|
||||
|
||||
|
||||
def try_run_command(
|
||||
cmd: Union[str, List[str]], error: str = "Could not run command"
|
||||
) -> subprocess.CompletedProcess:
|
||||
"""Try running a command and raise an error if it fails.
|
||||
|
||||
cmd (Union[str, List[str]]): The command to run.
|
||||
error (str): The error message.
|
||||
RETURNS (CompletedProcess): The completed process if the command ran.
|
||||
"""
|
||||
try:
|
||||
return run_command(cmd, capture=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
msg.fail(error)
|
||||
print(cmd)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _from_http_to_git(repo: str) -> str:
|
||||
if repo.startswith("http://"):
|
||||
repo = repo.replace(r"http://", r"https://")
|
||||
|
|
|
@ -110,7 +110,7 @@ def package(
|
|||
msg.good(f"Successfully created package '{model_name_v}'", main_path)
|
||||
if create_sdist:
|
||||
with util.working_dir(main_path):
|
||||
util.run_command([sys.executable, "setup.py", "sdist"])
|
||||
util.run_command([sys.executable, "setup.py", "sdist"], capture=False)
|
||||
zip_file = main_path / "dist" / f"{model_name_v}.tar.gz"
|
||||
msg.good(f"Successfully created zipped Python package", zip_file)
|
||||
|
||||
|
|
|
@ -144,7 +144,7 @@ def run_commands(
|
|||
if not silent:
|
||||
print(f"Running command: {join_command(command)}")
|
||||
if not dry:
|
||||
run_command(command)
|
||||
run_command(command, capture=False)
|
||||
|
||||
|
||||
def validate_subcommand(
|
||||
|
|
|
@ -254,7 +254,7 @@ def load_vectors_into_model(
|
|||
|
||||
|
||||
def load_vocab_data_into_model(
|
||||
nlp: "Language", *, lookups: Optional["Lookups"]=None
|
||||
nlp: "Language", *, lookups: Optional["Lookups"] = None
|
||||
) -> None:
|
||||
"""Load vocab data."""
|
||||
if lookups:
|
||||
|
@ -659,8 +659,8 @@ def join_command(command: List[str]) -> str:
|
|||
def run_command(
|
||||
command: Union[str, List[str]],
|
||||
*,
|
||||
capture: bool = False,
|
||||
stdin: Optional[Any] = None,
|
||||
capture: bool = False,
|
||||
) -> Optional[subprocess.CompletedProcess]:
|
||||
"""Run a command on the command line as a subprocess. If the subprocess
|
||||
returns a non-zero exit code, a system exit is performed.
|
||||
|
@ -668,33 +668,46 @@ def run_command(
|
|||
command (str / List[str]): The command. If provided as a string, the
|
||||
string will be split using shlex.split.
|
||||
stdin (Optional[Any]): stdin to read from or None.
|
||||
capture (bool): Whether to capture the output.
|
||||
capture (bool): Whether to capture the output and errors. If False,
|
||||
the stdout and stderr will not be redirected, and if there's an error,
|
||||
sys.exit will be called with the returncode. You should use capture=False
|
||||
when you want to turn over execution to the command, and capture=True
|
||||
when you want to run the command more like a function.
|
||||
RETURNS (Optional[CompletedProcess]): The process object.
|
||||
"""
|
||||
if isinstance(command, str):
|
||||
command = split_command(command)
|
||||
cmd_list = split_command(command)
|
||||
cmd_str = command
|
||||
else:
|
||||
cmd_list = command
|
||||
cmd_str = " ".join(command)
|
||||
try:
|
||||
ret = subprocess.run(
|
||||
command,
|
||||
cmd_list,
|
||||
env=os.environ.copy(),
|
||||
input=stdin,
|
||||
encoding="utf8",
|
||||
check=True,
|
||||
check=False,
|
||||
stdout=subprocess.PIPE if capture else None,
|
||||
stderr=subprocess.PIPE if capture else None,
|
||||
stderr=subprocess.STDOUT if capture else None,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
# Indicates the *command* wasn't found, it's an error before the command
|
||||
# is run.
|
||||
raise FileNotFoundError(
|
||||
Errors.E970.format(str_command=" ".join(command), tool=command[0])
|
||||
Errors.E970.format(str_command=cmd_str, tool=cmd_list[0])
|
||||
) from None
|
||||
except subprocess.CalledProcessError as e:
|
||||
# We don't want a duplicate traceback here so we're making sure the
|
||||
# CalledProcessError isn't re-raised. We also print both the string
|
||||
# message and the stderr, in case the error only has one of them.
|
||||
print(e.stderr)
|
||||
print(e)
|
||||
sys.exit(1)
|
||||
if ret.returncode != 0:
|
||||
if ret.returncode != 0 and capture:
|
||||
message = f"Error running command:\n\n{cmd_str}\n\n"
|
||||
message += f"Subprocess exited with status {ret.returncode}"
|
||||
if ret.stdout is not None:
|
||||
message += f"\n\nProcess log (stdout and stderr):\n\n"
|
||||
message += ret.stdout
|
||||
error = subprocess.SubprocessError(message)
|
||||
error.ret = ret
|
||||
error.command = cmd_str
|
||||
raise error
|
||||
elif ret.returncode != 0:
|
||||
sys.exit(ret.returncode)
|
||||
return ret
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user