From b29c0c608350081140deb456d9e0f56fd0537751 Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Thu, 8 Sep 2022 14:31:28 +0900 Subject: [PATCH] Clean up run_command This cleans up run_command to separate Windows and non-Windows argument prep. It still needs to be verified on Windows. One other change is to the structure of E970. E970 assumed that if a command was not found we would always have the command name, but that is not true on Windows if the input command is a string, which we can't split reliably. --- spacy/errors.py | 2 +- spacy/util.py | 53 ++++++++++++++++++++++++++++++++++--------------- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/spacy/errors.py b/spacy/errors.py index 608305a06..36e04066d 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -812,7 +812,7 @@ class Errors(metaclass=ErrorsWithCodes): "assign it a name, e.g. `@Language.component('your_name')`. You can " "then run `nlp.replace_pipe('{name}', 'your_name')`.") E969 = ("Expected string values for field '{field}', but received {types} instead. ") - E970 = ("Can not execute command '{str_command}'. Do you have '{tool}' installed?") + E970 = ("Can not execute command '{str_command}'.{msg}") E971 = ("Found incompatible lengths in `Doc.from_array`: {array_length} for the " "array and {doc_length} for the Doc itself.") E972 = ("`Example.__init__` got None for '{arg}'. Requires Doc.") diff --git a/spacy/util.py b/spacy/util.py index 7e767bed6..14b600aa1 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -954,23 +954,36 @@ def run_command( when you want to run the command more like a function. RETURNS (Optional[CompletedProcess]): The process object. """ - if isinstance(command, str): - # On Windows, it's possible for the split here to be different from what - # will actually be used for execution, but it will *usually* be correct, - # and is only used in debugging output if a command failed. - cmd_list = shlex.split(command, posix=True) - cmd_str = command + + # The actual command to execute + cmd_to_run = None + # these are used for debugging + cmd_str = None + tool = None + + if is_windows: + # On Windows we can pass a list or string. In this case we'll just pass + # what we're given. + cmd_to_run = command + if isinstance(command, str): + cmd_str = command + else: # list + # TODO list2cmdline is an intentionally undocumented internal Python + # function, check if we want to use this. + cmd_str = subprocess.list2cmdline(command) + tool = command[0] else: - cmd_list = command - # As above, this will usually but not always be correct on Windows, but - # is only used for debugging purposes. - if hasattr(shlex, "join"): - cmd_str = shlex.join(command) # type: ignore - else: - # shlex.join is more correct, but is only available in 3.8+ - cmd_str = " ".join(command) + # on Posix systems we should pass a list of strings + if isinstance(command, str): + cmd_str = command + cmd_to_run = shlex.split(command, posix=True) + tool = cmd_to_run[0] + else: # list + cmd_to_run = command + tool = command[0] + # This is the same as shlex.join, added in Py 3.8 + cmd_str = " ".join(shlex.quote(arg) for arg in command) try: - cmd_to_run = command if is_windows else cmd_list ret = subprocess.run( cmd_to_run, env=os.environ.copy(), @@ -983,9 +996,17 @@ def run_command( except FileNotFoundError: # Indicates the *command* wasn't found, it's an error before the command # is run. + + if tool is None: + # On Windows we may be unable to get the executable name. + tool_check = "" + else: + tool_check = f" Do you have {tool} installed?" + raise FileNotFoundError( - Errors.E970.format(str_command=cmd_str, tool=cmd_list[0]) + Errors.E970.format(str_command=cmd_str, msg=tool_check) ) from None + if ret.returncode != 0 and capture: message = f"Error running command:\n\n{cmd_str}\n\n" message += f"Subprocess exited with status {ret.returncode}"