Remove split_command and handle Windows commands

The main change here is the preprocessing of commands run in projects,
before being passed to the actual command running function.

Previously commands were split and the first argument was checked, and
if it was python or pip it was rewritten to use sys.executable. This
change makes it so that preprocessing is not done on Windows, because
there is no reliable way to split the string the same way the Windows
interpreter would.

It is possible to handle a limited set of commands in Windows, like
literal "python" declarations, even without full shell parsing, but then
quoting related to sys.executable has to be handled.
This commit is contained in:
Paul O'Leary McCann 2022-09-02 13:13:40 +09:00
parent 5058607896
commit 80c276c5f7
2 changed files with 26 additions and 28 deletions

View File

@ -8,7 +8,8 @@ import typer
from ... import about
from ...git_info import GIT_VERSION
from ...util import working_dir, run_command, split_command, is_cwd, join_command
from ...compat import is_windows
from ...util import working_dir, run_command, is_cwd, join_command
from ...util import SimpleFrozenList, is_minor_version_match, ENV_VARS
from ...util import check_bool_env_var, SimpleFrozenDict
from .._util import PROJECT_FILE, PROJECT_LOCK, load_project_config, get_hash
@ -157,7 +158,7 @@ def run_commands(
commands (List[str]): The string commands.
silent (bool): Don't print the commands.
dry (bool): Perform a dry run and don't execut anything.
dry (bool): Perform a dry run and don't execute anything.
capture (bool): Whether to capture the output and errors of individual commands.
If False, the stdout and stderr will not be redirected, and if there's an error,
sys.exit will be called with the return code. You should use capture=False
@ -165,7 +166,13 @@ def run_commands(
when you want to run the command more like a function.
"""
for c in commands:
command = split_command(c)
if is_windows:
# On Windows we don't rewrite the command because there's no
# reliable way to split and reassemble it
if not silent:
print(f"Running command: {c}")
else:
command = shlex.split(c, posix=True)
# Not sure if this is needed or a good idea. Motivation: users may often
# use commands in their config that reference "python" and we want to
# make sure that it's always executing the same Python that spaCy is
@ -179,6 +186,7 @@ def run_commands(
command = [sys.executable, "-m", "pip", *command[1:]]
if not silent:
print(f"Running command: {join_command(command)}")
if not dry:
run_command(command, capture=capture)

View File

@ -936,15 +936,6 @@ def replace_model_node(model: Model, target: Model, replacement: Model) -> None:
node.set_ref(ref_name, replacement)
def split_command(command: str) -> List[str]:
"""Split a string command using shlex.
command (str) : The command to split
RETURNS (List[str]): The split command.
"""
return shlex.split(command, posix=True)
def join_command(command: List[str]) -> str:
"""Join a command using shlex. shlex.join is only available for Python 3.8+,
so we're using a workaround here.
@ -964,8 +955,7 @@ def run_command(
"""Run a command on the command line as a subprocess. If the subprocess
returns a non-zero exit code, a system exit is performed.
command (str / List[str]): The command. If provided as a string, the
string will be split using shlex.split.
command (str / List[str]): The command to run.
stdin (Optional[Any]): stdin to read from or None.
capture (bool): Whether to capture the output and errors. If False,
the stdout and stderr will not be redirected, and if there's an error,
@ -975,7 +965,7 @@ def run_command(
RETURNS (Optional[CompletedProcess]): The process object.
"""
if isinstance(command, str):
cmd_list = split_command(command)
cmd_list = shlex.split(command, posix=True)
cmd_str = command
else:
cmd_list = command