Remove split_command and handle Windows commands

The main change here is the preprocessing of commands run in projects,
before being passed to the actual command running function.

Previously commands were split and the first argument was checked, and
if it was python or pip it was rewritten to use sys.executable. This
change makes it so that preprocessing is not done on Windows, because
there is no reliable way to split the string the same way the Windows
interpreter would.

It is possible to handle a limited set of commands in Windows, like
literal "python" declarations, even without full shell parsing, but then
quoting related to sys.executable has to be handled.
This commit is contained in:
Paul O'Leary McCann 2022-09-02 13:13:40 +09:00
parent 5058607896
commit 80c276c5f7
2 changed files with 26 additions and 28 deletions

View File

@ -8,7 +8,8 @@ import typer
from ... import about from ... import about
from ...git_info import GIT_VERSION from ...git_info import GIT_VERSION
from ...util import working_dir, run_command, split_command, is_cwd, join_command from ...compat import is_windows
from ...util import working_dir, run_command, is_cwd, join_command
from ...util import SimpleFrozenList, is_minor_version_match, ENV_VARS from ...util import SimpleFrozenList, is_minor_version_match, ENV_VARS
from ...util import check_bool_env_var, SimpleFrozenDict from ...util import check_bool_env_var, SimpleFrozenDict
from .._util import PROJECT_FILE, PROJECT_LOCK, load_project_config, get_hash from .._util import PROJECT_FILE, PROJECT_LOCK, load_project_config, get_hash
@ -157,7 +158,7 @@ def run_commands(
commands (List[str]): The string commands. commands (List[str]): The string commands.
silent (bool): Don't print the commands. silent (bool): Don't print the commands.
dry (bool): Perform a dry run and don't execut anything. dry (bool): Perform a dry run and don't execute anything.
capture (bool): Whether to capture the output and errors of individual commands. capture (bool): Whether to capture the output and errors of individual commands.
If False, the stdout and stderr will not be redirected, and if there's an error, If False, the stdout and stderr will not be redirected, and if there's an error,
sys.exit will be called with the return code. You should use capture=False sys.exit will be called with the return code. You should use capture=False
@ -165,20 +166,27 @@ def run_commands(
when you want to run the command more like a function. when you want to run the command more like a function.
""" """
for c in commands: for c in commands:
command = split_command(c) if is_windows:
# Not sure if this is needed or a good idea. Motivation: users may often # On Windows we don't rewrite the command because there's no
# use commands in their config that reference "python" and we want to # reliable way to split and reassemble it
# make sure that it's always executing the same Python that spaCy is if not silent:
# executed with and the pip in the same env, not some other Python/pip. print(f"Running command: {c}")
# Also ensures cross-compatibility if user 1 writes "python3" (because else:
# that's how it's set up on their system), and user 2 without the command = shlex.split(c, posix=True)
# shortcut tries to re-run the command. # Not sure if this is needed or a good idea. Motivation: users may often
if len(command) and command[0] in ("python", "python3"): # use commands in their config that reference "python" and we want to
command[0] = sys.executable # make sure that it's always executing the same Python that spaCy is
elif len(command) and command[0] in ("pip", "pip3"): # executed with and the pip in the same env, not some other Python/pip.
command = [sys.executable, "-m", "pip", *command[1:]] # Also ensures cross-compatibility if user 1 writes "python3" (because
if not silent: # that's how it's set up on their system), and user 2 without the
print(f"Running command: {join_command(command)}") # shortcut tries to re-run the command.
if len(command) and command[0] in ("python", "python3"):
command[0] = sys.executable
elif len(command) and command[0] in ("pip", "pip3"):
command = [sys.executable, "-m", "pip", *command[1:]]
if not silent:
print(f"Running command: {join_command(command)}")
if not dry: if not dry:
run_command(command, capture=capture) run_command(command, capture=capture)

View File

@ -936,15 +936,6 @@ def replace_model_node(model: Model, target: Model, replacement: Model) -> None:
node.set_ref(ref_name, replacement) node.set_ref(ref_name, replacement)
def split_command(command: str) -> List[str]:
"""Split a string command using shlex.
command (str) : The command to split
RETURNS (List[str]): The split command.
"""
return shlex.split(command, posix=True)
def join_command(command: List[str]) -> str: def join_command(command: List[str]) -> str:
"""Join a command using shlex. shlex.join is only available for Python 3.8+, """Join a command using shlex. shlex.join is only available for Python 3.8+,
so we're using a workaround here. so we're using a workaround here.
@ -964,8 +955,7 @@ def run_command(
"""Run a command on the command line as a subprocess. If the subprocess """Run a command on the command line as a subprocess. If the subprocess
returns a non-zero exit code, a system exit is performed. returns a non-zero exit code, a system exit is performed.
command (str / List[str]): The command. If provided as a string, the command (str / List[str]): The command to run.
string will be split using shlex.split.
stdin (Optional[Any]): stdin to read from or None. stdin (Optional[Any]): stdin to read from or None.
capture (bool): Whether to capture the output and errors. If False, capture (bool): Whether to capture the output and errors. If False,
the stdout and stderr will not be redirected, and if there's an error, the stdout and stderr will not be redirected, and if there's an error,
@ -975,7 +965,7 @@ def run_command(
RETURNS (Optional[CompletedProcess]): The process object. RETURNS (Optional[CompletedProcess]): The process object.
""" """
if isinstance(command, str): if isinstance(command, str):
cmd_list = split_command(command) cmd_list = shlex.split(command, posix=True)
cmd_str = command cmd_str = command
else: else:
cmd_list = command cmd_list = command