Merge pull request #6094 from explosion/bugfix/run_process

This commit is contained in:
Ines Montani 2020-09-20 16:49:30 +02:00 committed by GitHub
commit 6898b35028
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 68 additions and 63 deletions

View File

@ -308,6 +308,31 @@ def git_checkout(
msg.fail("Destination of checkout must not exist", exits=1)
if not dest.parent.exists():
raise IOError("Parent of destination of checkout must exist")
if sparse and git_version >= (2, 22):
return git_sparse_checkout(repo, subpath, dest, branch)
elif sparse:
# Only show warnings if the user explicitly wants sparse checkout but
# the Git version doesn't support it
err_old = (
f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
f"that doesn't fully support sparse checkout yet."
)
err_unk = "You're running an unknown version of Git, so sparse checkout has been disabled."
msg.warn(
f"{err_unk if git_version == (0, 0) else err_old} "
f"This means that more files than necessary may be downloaded "
f"temporarily. To only download the files needed, make sure "
f"you're using Git v2.22 or above."
)
with make_tempdir() as tmp_dir:
cmd = f"git -C {tmp_dir} clone {repo} . -b {branch}"
ret = run_command(cmd, capture=True)
# We need Path(name) to make sure we also support subdirectories
shutil.copytree(str(tmp_dir / Path(subpath)), str(dest))
def git_sparse_checkout(repo, subpath, dest, branch):
# We're using Git, partial clone and sparse checkout to
# only clone the files we need
# This ends up being RIDICULOUS. omg.
@ -324,47 +349,31 @@ def git_checkout(
# *that* we can do by path.
# We're using Git and sparse checkout to only clone the files we need
with make_tempdir() as tmp_dir:
supports_sparse = git_version >= (2, 22)
use_sparse = supports_sparse and sparse
# This is the "clone, but don't download anything" part.
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} "
if use_sparse:
cmd += f"--filter=blob:none" # <-- The key bit
# Only show warnings if the user explicitly wants sparse checkout but
# the Git version doesn't support it
elif sparse:
err_old = (
f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
f"that doesn't fully support sparse checkout yet."
)
err_unk = "You're running an unknown version of Git, so sparse checkout has been disabled."
msg.warn(
f"{err_unk if git_version == (0, 0) else err_old} "
f"This means that more files than necessary may be downloaded "
f"temporarily. To only download the files needed, make sure "
f"you're using Git v2.22 or above."
)
try_run_command(cmd)
cmd = (
f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
f"-b {branch} --filter=blob:none"
)
run_command(cmd)
# Now we need to find the missing filenames for the subpath we want.
# Looking for this 'rev-list' command in the git --help? Hah.
cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if use_sparse else ''} -- {subpath}"
ret = try_run_command(cmd)
cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
ret = run_command(cmd, capture=True)
git_repo = _from_http_to_git(repo)
# Now pass those missings into another bit of git internals
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
if use_sparse and not missings:
if not missings:
err = (
f"Could not find any relevant files for '{subpath}'. "
f"Did you specify a correct and complete path within repo '{repo}' "
f"and branch {branch}?"
)
msg.fail(err, exits=1)
if use_sparse:
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
try_run_command(cmd)
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
run_command(cmd, capture=True)
# And finally, we can checkout our subpath
cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
try_run_command(cmd)
run_command(cmd, capture=True)
# We need Path(name) to make sure we also support subdirectories
shutil.move(str(tmp_dir / Path(subpath)), str(dest))
@ -378,7 +387,7 @@ def get_git_version(
RETURNS (Tuple[int, int]): The version as a (major, minor) tuple. Returns
(0, 0) if the version couldn't be determined.
"""
ret = try_run_command(["git", "--version"], error=error)
ret = run_command("git --version", capture=True)
stdout = ret.stdout.strip()
if not stdout or not stdout.startswith("git version"):
return (0, 0)
@ -386,23 +395,6 @@ def get_git_version(
return (int(version[0]), int(version[1]))
def try_run_command(
cmd: Union[str, List[str]], error: str = "Could not run command"
) -> subprocess.CompletedProcess:
"""Try running a command and raise an error if it fails.
cmd (Union[str, List[str]]): The command to run.
error (str): The error message.
RETURNS (CompletedProcess): The completed process if the command ran.
"""
try:
return run_command(cmd, capture=True)
except subprocess.CalledProcessError as e:
msg.fail(error)
print(cmd)
sys.exit(1)
def _from_http_to_git(repo: str) -> str:
if repo.startswith("http://"):
repo = repo.replace(r"http://", r"https://")

View File

@ -110,7 +110,7 @@ def package(
msg.good(f"Successfully created package '{model_name_v}'", main_path)
if create_sdist:
with util.working_dir(main_path):
util.run_command([sys.executable, "setup.py", "sdist"])
util.run_command([sys.executable, "setup.py", "sdist"], capture=False)
zip_file = main_path / "dist" / f"{model_name_v}.tar.gz"
msg.good(f"Successfully created zipped Python package", zip_file)

View File

@ -144,7 +144,7 @@ def run_commands(
if not silent:
print(f"Running command: {join_command(command)}")
if not dry:
run_command(command)
run_command(command, capture=False)
def validate_subcommand(

View File

@ -254,7 +254,7 @@ def load_vectors_into_model(
def load_vocab_data_into_model(
nlp: "Language", *, lookups: Optional["Lookups"]=None
nlp: "Language", *, lookups: Optional["Lookups"] = None
) -> None:
"""Load vocab data."""
if lookups:
@ -659,8 +659,8 @@ def join_command(command: List[str]) -> str:
def run_command(
command: Union[str, List[str]],
*,
capture: bool = False,
stdin: Optional[Any] = None,
capture: bool = False,
) -> Optional[subprocess.CompletedProcess]:
"""Run a command on the command line as a subprocess. If the subprocess
returns a non-zero exit code, a system exit is performed.
@ -668,33 +668,46 @@ def run_command(
command (str / List[str]): The command. If provided as a string, the
string will be split using shlex.split.
stdin (Optional[Any]): stdin to read from or None.
capture (bool): Whether to capture the output.
capture (bool): Whether to capture the output and errors. If False,
the stdout and stderr will not be redirected, and if there's an error,
sys.exit will be called with the returncode. You should use capture=False
when you want to turn over execution to the command, and capture=True
when you want to run the command more like a function.
RETURNS (Optional[CompletedProcess]): The process object.
"""
if isinstance(command, str):
command = split_command(command)
cmd_list = split_command(command)
cmd_str = command
else:
cmd_list = command
cmd_str = " ".join(command)
try:
ret = subprocess.run(
command,
cmd_list,
env=os.environ.copy(),
input=stdin,
encoding="utf8",
check=True,
check=False,
stdout=subprocess.PIPE if capture else None,
stderr=subprocess.PIPE if capture else None,
stderr=subprocess.STDOUT if capture else None,
)
except FileNotFoundError:
# Indicates the *command* wasn't found, it's an error before the command
# is run.
raise FileNotFoundError(
Errors.E970.format(str_command=" ".join(command), tool=command[0])
Errors.E970.format(str_command=cmd_str, tool=cmd_list[0])
) from None
except subprocess.CalledProcessError as e:
# We don't want a duplicate traceback here so we're making sure the
# CalledProcessError isn't re-raised. We also print both the string
# message and the stderr, in case the error only has one of them.
print(e.stderr)
print(e)
sys.exit(1)
if ret.returncode != 0:
if ret.returncode != 0 and capture:
message = f"Error running command:\n\n{cmd_str}\n\n"
message += f"Subprocess exited with status {ret.returncode}"
if ret.stdout is not None:
message += f"\n\nProcess log (stdout and stderr):\n\n"
message += ret.stdout
error = subprocess.SubprocessError(message)
error.ret = ret
error.command = cmd_str
raise error
elif ret.returncode != 0:
sys.exit(ret.returncode)
return ret