Remove split_command and handle Windows commands

The main change here is the preprocessing of commands run in projects, before being passed to the actual command running function. Previously commands were split and the first argument was checked, and if it was python or pip it was rewritten to use sys.executable. This change makes it so that preprocessing is not done on Windows, because there is no reliable way to split the string the same way the Windows interpreter would. It is possible to handle a limited set of commands in Windows, like literal "python" declarations, even without full shell parsing, but then quoting related to sys.executable has to be handled.
2025-08-07 21:54:54 +03:00 · 2022-09-02 13:13:40 +09:00 · 2022-09-02 13:13:40 +09:00 · 80c276c5f7
commit 80c276c5f7
parent 5058607896
2 changed files with 26 additions and 28 deletions
--- a/spacy/cli/project/run.py
+++ b/spacy/cli/project/run.py
@ -8,7 +8,8 @@ import typer

 from ... import about
 from ...git_info import GIT_VERSION
-from ...util import working_dir, run_command, split_command, is_cwd, join_command
+from ...compat import is_windows
+from ...util import working_dir, run_command, is_cwd, join_command
 from ...util import SimpleFrozenList, is_minor_version_match, ENV_VARS
 from ...util import check_bool_env_var, SimpleFrozenDict
 from .._util import PROJECT_FILE, PROJECT_LOCK, load_project_config, get_hash
@ -157,7 +158,7 @@ def run_commands(

    commands (List[str]): The string commands.
    silent (bool): Don't print the commands.
-    dry (bool): Perform a dry run and don't execut anything.
+    dry (bool): Perform a dry run and don't execute anything.
    capture (bool): Whether to capture the output and errors of individual commands.
        If False, the stdout and stderr will not be redirected, and if there's an error,
        sys.exit will be called with the return code. You should use capture=False
@ -165,7 +166,13 @@ def run_commands(
        when you want to run the command more like a function.
    """
    for c in commands:
-        command = split_command(c)
+        if is_windows:
+            # On Windows we don't rewrite the command because there's no
+            # reliable way to split and reassemble it
+            if not silent:
+                print(f"Running command: {c}")
+        else:
+            command = shlex.split(c, posix=True)
            # Not sure if this is needed or a good idea. Motivation: users may often
            # use commands in their config that reference "python" and we want to
            # make sure that it's always executing the same Python that spaCy is
@ -179,6 +186,7 @@ def run_commands(
                command = [sys.executable, "-m", "pip", *command[1:]]
            if not silent:
                print(f"Running command: {join_command(command)}")
+
        if not dry:
            run_command(command, capture=capture)

--- a/spacy/util.py
+++ b/spacy/util.py
@ -936,15 +936,6 @@ def replace_model_node(model: Model, target: Model, replacement: Model) -> None:
                node.set_ref(ref_name, replacement)


-def split_command(command: str) -> List[str]:
-    """Split a string command using shlex.
-
-    command (str) : The command to split
-    RETURNS (List[str]): The split command.
-    """
-    return shlex.split(command, posix=True)
-
-
 def join_command(command: List[str]) -> str:
    """Join a command using shlex. shlex.join is only available for Python 3.8+,
    so we're using a workaround here.
@ -964,8 +955,7 @@ def run_command(
    """Run a command on the command line as a subprocess. If the subprocess
    returns a non-zero exit code, a system exit is performed.

-    command (str / List[str]): The command. If provided as a string, the
-        string will be split using shlex.split.
+    command (str / List[str]): The command to run.
    stdin (Optional[Any]): stdin to read from or None.
    capture (bool): Whether to capture the output and errors. If False,
        the stdout and stderr will not be redirected, and if there's an error,
@ -975,7 +965,7 @@ def run_command(
    RETURNS (Optional[CompletedProcess]): The process object.
    """
    if isinstance(command, str):
-        cmd_list = split_command(command)
+        cmd_list = shlex.split(command, posix=True)
        cmd_str = command
    else:
        cmd_list = command