Merge pull request #6094 from explosion/bugfix/run_process

This commit is contained in:
Ines Montani 2020-09-20 16:49:30 +02:00 committed by GitHub
commit 6898b35028
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 68 additions and 63 deletions

View File

@ -308,6 +308,31 @@ def git_checkout(
msg.fail("Destination of checkout must not exist", exits=1) msg.fail("Destination of checkout must not exist", exits=1)
if not dest.parent.exists(): if not dest.parent.exists():
raise IOError("Parent of destination of checkout must exist") raise IOError("Parent of destination of checkout must exist")
if sparse and git_version >= (2, 22):
return git_sparse_checkout(repo, subpath, dest, branch)
elif sparse:
# Only show warnings if the user explicitly wants sparse checkout but
# the Git version doesn't support it
err_old = (
f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
f"that doesn't fully support sparse checkout yet."
)
err_unk = "You're running an unknown version of Git, so sparse checkout has been disabled."
msg.warn(
f"{err_unk if git_version == (0, 0) else err_old} "
f"This means that more files than necessary may be downloaded "
f"temporarily. To only download the files needed, make sure "
f"you're using Git v2.22 or above."
)
with make_tempdir() as tmp_dir:
cmd = f"git -C {tmp_dir} clone {repo} . -b {branch}"
ret = run_command(cmd, capture=True)
# We need Path(name) to make sure we also support subdirectories
shutil.copytree(str(tmp_dir / Path(subpath)), str(dest))
def git_sparse_checkout(repo, subpath, dest, branch):
# We're using Git, partial clone and sparse checkout to # We're using Git, partial clone and sparse checkout to
# only clone the files we need # only clone the files we need
# This ends up being RIDICULOUS. omg. # This ends up being RIDICULOUS. omg.
@ -324,47 +349,31 @@ def git_checkout(
# *that* we can do by path. # *that* we can do by path.
# We're using Git and sparse checkout to only clone the files we need # We're using Git and sparse checkout to only clone the files we need
with make_tempdir() as tmp_dir: with make_tempdir() as tmp_dir:
supports_sparse = git_version >= (2, 22)
use_sparse = supports_sparse and sparse
# This is the "clone, but don't download anything" part. # This is the "clone, but don't download anything" part.
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} " cmd = (
if use_sparse: f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
cmd += f"--filter=blob:none" # <-- The key bit f"-b {branch} --filter=blob:none"
# Only show warnings if the user explicitly wants sparse checkout but
# the Git version doesn't support it
elif sparse:
err_old = (
f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
f"that doesn't fully support sparse checkout yet."
) )
err_unk = "You're running an unknown version of Git, so sparse checkout has been disabled." run_command(cmd)
msg.warn(
f"{err_unk if git_version == (0, 0) else err_old} "
f"This means that more files than necessary may be downloaded "
f"temporarily. To only download the files needed, make sure "
f"you're using Git v2.22 or above."
)
try_run_command(cmd)
# Now we need to find the missing filenames for the subpath we want. # Now we need to find the missing filenames for the subpath we want.
# Looking for this 'rev-list' command in the git --help? Hah. # Looking for this 'rev-list' command in the git --help? Hah.
cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if use_sparse else ''} -- {subpath}" cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
ret = try_run_command(cmd) ret = run_command(cmd, capture=True)
git_repo = _from_http_to_git(repo) git_repo = _from_http_to_git(repo)
# Now pass those missings into another bit of git internals # Now pass those missings into another bit of git internals
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")]) missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
if use_sparse and not missings: if not missings:
err = ( err = (
f"Could not find any relevant files for '{subpath}'. " f"Could not find any relevant files for '{subpath}'. "
f"Did you specify a correct and complete path within repo '{repo}' " f"Did you specify a correct and complete path within repo '{repo}' "
f"and branch {branch}?" f"and branch {branch}?"
) )
msg.fail(err, exits=1) msg.fail(err, exits=1)
if use_sparse:
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}" cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
try_run_command(cmd) run_command(cmd, capture=True)
# And finally, we can checkout our subpath # And finally, we can checkout our subpath
cmd = f"git -C {tmp_dir} checkout {branch} {subpath}" cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
try_run_command(cmd) run_command(cmd, capture=True)
# We need Path(name) to make sure we also support subdirectories # We need Path(name) to make sure we also support subdirectories
shutil.move(str(tmp_dir / Path(subpath)), str(dest)) shutil.move(str(tmp_dir / Path(subpath)), str(dest))
@ -378,7 +387,7 @@ def get_git_version(
RETURNS (Tuple[int, int]): The version as a (major, minor) tuple. Returns RETURNS (Tuple[int, int]): The version as a (major, minor) tuple. Returns
(0, 0) if the version couldn't be determined. (0, 0) if the version couldn't be determined.
""" """
ret = try_run_command(["git", "--version"], error=error) ret = run_command("git --version", capture=True)
stdout = ret.stdout.strip() stdout = ret.stdout.strip()
if not stdout or not stdout.startswith("git version"): if not stdout or not stdout.startswith("git version"):
return (0, 0) return (0, 0)
@ -386,23 +395,6 @@ def get_git_version(
return (int(version[0]), int(version[1])) return (int(version[0]), int(version[1]))
def try_run_command(
cmd: Union[str, List[str]], error: str = "Could not run command"
) -> subprocess.CompletedProcess:
"""Try running a command and raise an error if it fails.
cmd (Union[str, List[str]]): The command to run.
error (str): The error message.
RETURNS (CompletedProcess): The completed process if the command ran.
"""
try:
return run_command(cmd, capture=True)
except subprocess.CalledProcessError as e:
msg.fail(error)
print(cmd)
sys.exit(1)
def _from_http_to_git(repo: str) -> str: def _from_http_to_git(repo: str) -> str:
if repo.startswith("http://"): if repo.startswith("http://"):
repo = repo.replace(r"http://", r"https://") repo = repo.replace(r"http://", r"https://")

View File

@ -110,7 +110,7 @@ def package(
msg.good(f"Successfully created package '{model_name_v}'", main_path) msg.good(f"Successfully created package '{model_name_v}'", main_path)
if create_sdist: if create_sdist:
with util.working_dir(main_path): with util.working_dir(main_path):
util.run_command([sys.executable, "setup.py", "sdist"]) util.run_command([sys.executable, "setup.py", "sdist"], capture=False)
zip_file = main_path / "dist" / f"{model_name_v}.tar.gz" zip_file = main_path / "dist" / f"{model_name_v}.tar.gz"
msg.good(f"Successfully created zipped Python package", zip_file) msg.good(f"Successfully created zipped Python package", zip_file)

View File

@ -144,7 +144,7 @@ def run_commands(
if not silent: if not silent:
print(f"Running command: {join_command(command)}") print(f"Running command: {join_command(command)}")
if not dry: if not dry:
run_command(command) run_command(command, capture=False)
def validate_subcommand( def validate_subcommand(

View File

@ -254,7 +254,7 @@ def load_vectors_into_model(
def load_vocab_data_into_model( def load_vocab_data_into_model(
nlp: "Language", *, lookups: Optional["Lookups"]=None nlp: "Language", *, lookups: Optional["Lookups"] = None
) -> None: ) -> None:
"""Load vocab data.""" """Load vocab data."""
if lookups: if lookups:
@ -659,8 +659,8 @@ def join_command(command: List[str]) -> str:
def run_command( def run_command(
command: Union[str, List[str]], command: Union[str, List[str]],
*, *,
capture: bool = False,
stdin: Optional[Any] = None, stdin: Optional[Any] = None,
capture: bool = False,
) -> Optional[subprocess.CompletedProcess]: ) -> Optional[subprocess.CompletedProcess]:
"""Run a command on the command line as a subprocess. If the subprocess """Run a command on the command line as a subprocess. If the subprocess
returns a non-zero exit code, a system exit is performed. returns a non-zero exit code, a system exit is performed.
@ -668,33 +668,46 @@ def run_command(
command (str / List[str]): The command. If provided as a string, the command (str / List[str]): The command. If provided as a string, the
string will be split using shlex.split. string will be split using shlex.split.
stdin (Optional[Any]): stdin to read from or None. stdin (Optional[Any]): stdin to read from or None.
capture (bool): Whether to capture the output. capture (bool): Whether to capture the output and errors. If False,
the stdout and stderr will not be redirected, and if there's an error,
sys.exit will be called with the returncode. You should use capture=False
when you want to turn over execution to the command, and capture=True
when you want to run the command more like a function.
RETURNS (Optional[CompletedProcess]): The process object. RETURNS (Optional[CompletedProcess]): The process object.
""" """
if isinstance(command, str): if isinstance(command, str):
command = split_command(command) cmd_list = split_command(command)
cmd_str = command
else:
cmd_list = command
cmd_str = " ".join(command)
try: try:
ret = subprocess.run( ret = subprocess.run(
command, cmd_list,
env=os.environ.copy(), env=os.environ.copy(),
input=stdin, input=stdin,
encoding="utf8", encoding="utf8",
check=True, check=False,
stdout=subprocess.PIPE if capture else None, stdout=subprocess.PIPE if capture else None,
stderr=subprocess.PIPE if capture else None, stderr=subprocess.STDOUT if capture else None,
) )
except FileNotFoundError: except FileNotFoundError:
# Indicates the *command* wasn't found, it's an error before the command
# is run.
raise FileNotFoundError( raise FileNotFoundError(
Errors.E970.format(str_command=" ".join(command), tool=command[0]) Errors.E970.format(str_command=cmd_str, tool=cmd_list[0])
) from None ) from None
except subprocess.CalledProcessError as e: if ret.returncode != 0 and capture:
# We don't want a duplicate traceback here so we're making sure the message = f"Error running command:\n\n{cmd_str}\n\n"
# CalledProcessError isn't re-raised. We also print both the string message += f"Subprocess exited with status {ret.returncode}"
# message and the stderr, in case the error only has one of them. if ret.stdout is not None:
print(e.stderr) message += f"\n\nProcess log (stdout and stderr):\n\n"
print(e) message += ret.stdout
sys.exit(1) error = subprocess.SubprocessError(message)
if ret.returncode != 0: error.ret = ret
error.command = cmd_str
raise error
elif ret.returncode != 0:
sys.exit(ret.returncode) sys.exit(ret.returncode)
return ret return ret