From 80c276c5f71d22c42db4ee19d57cdba20d3bee68 Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Fri, 2 Sep 2022 13:13:40 +0900 Subject: [PATCH] Remove split_command and handle Windows commands The main change here is the preprocessing of commands run in projects, before being passed to the actual command running function. Previously commands were split and the first argument was checked, and if it was python or pip it was rewritten to use sys.executable. This change makes it so that preprocessing is not done on Windows, because there is no reliable way to split the string the same way the Windows interpreter would. It is possible to handle a limited set of commands in Windows, like literal "python" declarations, even without full shell parsing, but then quoting related to sys.executable has to be handled. --- spacy/cli/project/run.py | 40 ++++++++++++++++++++++++---------------- spacy/util.py | 14 ++------------ 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/spacy/cli/project/run.py b/spacy/cli/project/run.py index 734803bc4..4e836d6db 100644 --- a/spacy/cli/project/run.py +++ b/spacy/cli/project/run.py @@ -8,7 +8,8 @@ import typer from ... import about from ...git_info import GIT_VERSION -from ...util import working_dir, run_command, split_command, is_cwd, join_command +from ...compat import is_windows +from ...util import working_dir, run_command, is_cwd, join_command from ...util import SimpleFrozenList, is_minor_version_match, ENV_VARS from ...util import check_bool_env_var, SimpleFrozenDict from .._util import PROJECT_FILE, PROJECT_LOCK, load_project_config, get_hash @@ -157,7 +158,7 @@ def run_commands( commands (List[str]): The string commands. silent (bool): Don't print the commands. - dry (bool): Perform a dry run and don't execut anything. + dry (bool): Perform a dry run and don't execute anything. capture (bool): Whether to capture the output and errors of individual commands. If False, the stdout and stderr will not be redirected, and if there's an error, sys.exit will be called with the return code. You should use capture=False @@ -165,20 +166,27 @@ def run_commands( when you want to run the command more like a function. """ for c in commands: - command = split_command(c) - # Not sure if this is needed or a good idea. Motivation: users may often - # use commands in their config that reference "python" and we want to - # make sure that it's always executing the same Python that spaCy is - # executed with and the pip in the same env, not some other Python/pip. - # Also ensures cross-compatibility if user 1 writes "python3" (because - # that's how it's set up on their system), and user 2 without the - # shortcut tries to re-run the command. - if len(command) and command[0] in ("python", "python3"): - command[0] = sys.executable - elif len(command) and command[0] in ("pip", "pip3"): - command = [sys.executable, "-m", "pip", *command[1:]] - if not silent: - print(f"Running command: {join_command(command)}") + if is_windows: + # On Windows we don't rewrite the command because there's no + # reliable way to split and reassemble it + if not silent: + print(f"Running command: {c}") + else: + command = shlex.split(c, posix=True) + # Not sure if this is needed or a good idea. Motivation: users may often + # use commands in their config that reference "python" and we want to + # make sure that it's always executing the same Python that spaCy is + # executed with and the pip in the same env, not some other Python/pip. + # Also ensures cross-compatibility if user 1 writes "python3" (because + # that's how it's set up on their system), and user 2 without the + # shortcut tries to re-run the command. + if len(command) and command[0] in ("python", "python3"): + command[0] = sys.executable + elif len(command) and command[0] in ("pip", "pip3"): + command = [sys.executable, "-m", "pip", *command[1:]] + if not silent: + print(f"Running command: {join_command(command)}") + if not dry: run_command(command, capture=capture) diff --git a/spacy/util.py b/spacy/util.py index a2ec63295..767f13664 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -936,15 +936,6 @@ def replace_model_node(model: Model, target: Model, replacement: Model) -> None: node.set_ref(ref_name, replacement) -def split_command(command: str) -> List[str]: - """Split a string command using shlex. - - command (str) : The command to split - RETURNS (List[str]): The split command. - """ - return shlex.split(command, posix=True) - - def join_command(command: List[str]) -> str: """Join a command using shlex. shlex.join is only available for Python 3.8+, so we're using a workaround here. @@ -964,8 +955,7 @@ def run_command( """Run a command on the command line as a subprocess. If the subprocess returns a non-zero exit code, a system exit is performed. - command (str / List[str]): The command. If provided as a string, the - string will be split using shlex.split. + command (str / List[str]): The command to run. stdin (Optional[Any]): stdin to read from or None. capture (bool): Whether to capture the output and errors. If False, the stdout and stderr will not be redirected, and if there's an error, @@ -975,7 +965,7 @@ def run_command( RETURNS (Optional[CompletedProcess]): The process object. """ if isinstance(command, str): - cmd_list = split_command(command) + cmd_list = shlex.split(command, posix=True) cmd_str = command else: cmd_list = command