Merge pull request #6094 from explosion/bugfix/run_process

2025-07-11 00:32:40 +03:00 · 2020-09-20 16:49:30 +02:00 · 2020-09-20 16:49:30 +02:00 · 6898b35028
commit 6898b35028
parent 744f259b9c 8fb59d958c
4 changed files with 68 additions and 63 deletions
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@ -308,6 +308,31 @@ def git_checkout(
        msg.fail("Destination of checkout must not exist", exits=1)
    if not dest.parent.exists():
        raise IOError("Parent of destination of checkout must exist")
    if sparse and git_version >= (2, 22):
        return git_sparse_checkout(repo, subpath, dest, branch)
    elif sparse:
        # Only show warnings if the user explicitly wants sparse checkout but
        # the Git version doesn't support it
        err_old = (
            f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
            f"that doesn't fully support sparse checkout yet."
        )
        err_unk = "You're running an unknown version of Git, so sparse checkout has been disabled."
        msg.warn(
            f"{err_unk if git_version == (0, 0) else err_old} "
            f"This means that more files than necessary may be downloaded "
            f"temporarily. To only download the files needed, make sure "
            f"you're using Git v2.22 or above."
        )
    with make_tempdir() as tmp_dir:
        cmd = f"git -C {tmp_dir} clone {repo} . -b {branch}"
        ret = run_command(cmd, capture=True)
        # We need Path(name) to make sure we also support subdirectories
        shutil.copytree(str(tmp_dir / Path(subpath)), str(dest))
 def git_sparse_checkout(repo, subpath, dest, branch):
    # We're using Git, partial clone and sparse checkout to
    # only clone the files we need
    # This ends up being RIDICULOUS. omg.
@ -324,47 +349,31 @@ def git_checkout(
    # *that* we can do by path.
    # We're using Git and sparse checkout to only clone the files we need
    with make_tempdir() as tmp_dir:
        supports_sparse = git_version >= (2, 22)
        use_sparse = supports_sparse and sparse
        # This is the "clone, but don't download anything" part.
-        cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} "
+        cmd = (
-        if use_sparse:
+            f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
-            cmd += f"--filter=blob:none"  # <-- The key bit
+            f"-b {branch} --filter=blob:none"
        # Only show warnings if the user explicitly wants sparse checkout but
        # the Git version doesn't support it
        elif sparse:
            err_old = (
                f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
                f"that doesn't fully support sparse checkout yet."
        )
-            err_unk = "You're running an unknown version of Git, so sparse checkout has been disabled."
+        run_command(cmd)
            msg.warn(
                f"{err_unk if git_version == (0, 0) else err_old} "
                f"This means that more files than necessary may be downloaded "
                f"temporarily. To only download the files needed, make sure "
                f"you're using Git v2.22 or above."
            )
        try_run_command(cmd)
        # Now we need to find the missing filenames for the subpath we want.
        # Looking for this 'rev-list' command in the git --help? Hah.
-        cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if use_sparse else ''} -- {subpath}"
+        cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
-        ret = try_run_command(cmd)
+        ret = run_command(cmd, capture=True)
        git_repo = _from_http_to_git(repo)
        # Now pass those missings into another bit of git internals
        missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
-        if use_sparse and not missings:
+        if not missings:
            err = (
                f"Could not find any relevant files for '{subpath}'. "
                f"Did you specify a correct and complete path within repo '{repo}' "
                f"and branch {branch}?"
            )
            msg.fail(err, exits=1)
        if use_sparse:
        cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
-            try_run_command(cmd)
+        run_command(cmd, capture=True)
        # And finally, we can checkout our subpath
        cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
-        try_run_command(cmd)
+        run_command(cmd, capture=True)
        # We need Path(name) to make sure we also support subdirectories
        shutil.move(str(tmp_dir / Path(subpath)), str(dest))
@ -378,7 +387,7 @@ def get_git_version(
    RETURNS (Tuple[int, int]): The version as a (major, minor) tuple. Returns
        (0, 0) if the version couldn't be determined.
    """
-    ret = try_run_command(["git", "--version"], error=error)
+    ret = run_command("git --version", capture=True)
    stdout = ret.stdout.strip()
    if not stdout or not stdout.startswith("git version"):
        return (0, 0)
@ -386,23 +395,6 @@ def get_git_version(
    return (int(version[0]), int(version[1]))
 def try_run_command(
    cmd: Union[str, List[str]], error: str = "Could not run command"
 ) -> subprocess.CompletedProcess:
    """Try running a command and raise an error if it fails.
    cmd (Union[str, List[str]]): The command to run.
    error (str): The error message.
    RETURNS (CompletedProcess): The completed process if the command ran.
    """
    try:
        return run_command(cmd, capture=True)
    except subprocess.CalledProcessError as e:
        msg.fail(error)
        print(cmd)
        sys.exit(1)
 def _from_http_to_git(repo: str) -> str:
    if repo.startswith("http://"):
        repo = repo.replace(r"http://", r"https://")
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@ -110,7 +110,7 @@ def package(
    msg.good(f"Successfully created package '{model_name_v}'", main_path)
    if create_sdist:
        with util.working_dir(main_path):
-            util.run_command([sys.executable, "setup.py", "sdist"])
+            util.run_command([sys.executable, "setup.py", "sdist"], capture=False)
        zip_file = main_path / "dist" / f"{model_name_v}.tar.gz"
        msg.good(f"Successfully created zipped Python package", zip_file)
--- a/spacy/cli/project/run.py
+++ b/spacy/cli/project/run.py
@ -144,7 +144,7 @@ def run_commands(
        if not silent:
            print(f"Running command: {join_command(command)}")
        if not dry:
-            run_command(command)
+            run_command(command, capture=False)
 def validate_subcommand(
--- a/spacy/util.py
+++ b/spacy/util.py
@ -254,7 +254,7 @@ def load_vectors_into_model(
 def load_vocab_data_into_model(
-    nlp: "Language", *, lookups: Optional["Lookups"]=None
+    nlp: "Language", *, lookups: Optional["Lookups"] = None
 ) -> None:
    """Load vocab data."""
    if lookups:
@ -659,8 +659,8 @@ def join_command(command: List[str]) -> str:
 def run_command(
    command: Union[str, List[str]],
    *,
    capture: bool = False,
    stdin: Optional[Any] = None,
    capture: bool = False,
 ) -> Optional[subprocess.CompletedProcess]:
    """Run a command on the command line as a subprocess. If the subprocess
    returns a non-zero exit code, a system exit is performed.
@ -668,33 +668,46 @@ def run_command(
    command (str / List[str]): The command. If provided as a string, the
        string will be split using shlex.split.
    stdin (Optional[Any]): stdin to read from or None.
-    capture (bool): Whether to capture the output.
+    capture (bool): Whether to capture the output and errors. If False,
        the stdout and stderr will not be redirected, and if there's an error,
        sys.exit will be called with the returncode. You should use capture=False
        when you want to turn over execution to the command, and capture=True
        when you want to run the command more like a function.
    RETURNS (Optional[CompletedProcess]): The process object.
    """
    if isinstance(command, str):
-        command = split_command(command)
+        cmd_list = split_command(command)
        cmd_str = command
    else:
        cmd_list = command
        cmd_str = " ".join(command)
    try:
        ret = subprocess.run(
-            command,
+            cmd_list,
            env=os.environ.copy(),
            input=stdin,
            encoding="utf8",
-            check=True,
+            check=False,
            stdout=subprocess.PIPE if capture else None,
-            stderr=subprocess.PIPE if capture else None,
+            stderr=subprocess.STDOUT if capture else None,
        )
    except FileNotFoundError:
        # Indicates the *command* wasn't found, it's an error before the command
        # is run.
        raise FileNotFoundError(
-            Errors.E970.format(str_command=" ".join(command), tool=command[0])
+            Errors.E970.format(str_command=cmd_str, tool=cmd_list[0])
        ) from None
-    except subprocess.CalledProcessError as e:
+    if ret.returncode != 0 and capture:
-        # We don't want a duplicate traceback here so we're making sure the
+        message = f"Error running command:\n\n{cmd_str}\n\n"
-        # CalledProcessError isn't re-raised. We also print both the string
+        message += f"Subprocess exited with status {ret.returncode}"
-        # message and the stderr, in case the error only has one of them.
+        if ret.stdout is not None:
-        print(e.stderr)
+            message += f"\n\nProcess log (stdout and stderr):\n\n"
-        print(e)
+            message += ret.stdout
-        sys.exit(1)
+        error = subprocess.SubprocessError(message)
-    if ret.returncode != 0:
+        error.ret = ret
        error.command = cmd_str
        raise error
    elif ret.returncode != 0:
        sys.exit(ret.returncode)
    return ret