diff --git a/spacy/cli/download.py b/spacy/cli/download.py index 0c9a32b93..41d6bd886 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -19,7 +19,7 @@ def download_cli( # fmt: off ctx: typer.Context, model: str = Arg(..., help="Name of pipeline package to download"), - direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"), + direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download PROJECT_of name + version"), sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"), # fmt: on ): diff --git a/spacy/util.py b/spacy/util.py index 1b7f96c45..73727ca62 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -1,6 +1,6 @@ from typing import List, Mapping, NoReturn, Union, Dict, Any, Set, cast from typing import Optional, Iterable, Callable, Tuple, Type -from typing import Iterator, Pattern, TYPE_CHECKING +from typing import Iterator, Pattern, Generator, TYPE_CHECKING from types import ModuleType import os import importlib @@ -22,11 +22,17 @@ import warnings from packaging.specifiers import SpecifierSet, InvalidSpecifier from packaging.version import Version, InvalidVersion from packaging.requirements import Requirement +import subprocess +from contextlib import contextmanager from collections import defaultdict +import tempfile +import shutil +import shlex import inspect import pkgutil import logging import socket +import stat try: import cupy.random @@ -41,7 +47,7 @@ from thinc.api import fix_random_seed, compounding, decaying # noqa: F401 from .symbols import ORTH -from .compat import cupy, CudaStream, importlib_metadata +from .compat import cupy, CudaStream, is_windows, importlib_metadata from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS from . import about @@ -942,6 +948,107 @@ def replace_model_node(model: Model, target: Model, replacement: Model) -> None: node.set_ref(ref_name, replacement) +def split_command(command: str) -> List[str]: + """Split a string command using shlex. Handles platform compatibility. + command (str) : The command to split + RETURNS (List[str]): The split command. + """ + return shlex.split(command, posix=not is_windows) + + +def run_command( + command: Union[str, List[str]], + *, + stdin: Optional[Any] = None, + capture: bool = False, +) -> subprocess.CompletedProcess: + """Run a command on the command line as a subprocess. If the subprocess + returns a non-zero exit code, a system exit is performed. + command (str / List[str]): The command. If provided as a string, the + string will be split using shlex.split. + stdin (Optional[Any]): stdin to read from or None. + capture (bool): Whether to capture the output and errors. If False, + the stdout and stderr will not be redirected, and if there's an error, + sys.exit will be called with the return code. You should use capture=False + when you want to turn over execution to the command, and capture=True + when you want to run the command more like a function. + RETURNS (Optional[CompletedProcess]): The process object. + """ + if isinstance(command, str): + cmd_list = split_command(command) + cmd_str = command + else: + cmd_list = command + cmd_str = " ".join(command) + try: + ret = subprocess.run( + cmd_list, + env=os.environ.copy(), + input=stdin, + encoding="utf8", + check=False, + stdout=subprocess.PIPE if capture else None, + stderr=subprocess.STDOUT if capture else None, + ) + except FileNotFoundError: + # Indicates the *command* wasn't found, it's an error before the command + # is run. + raise FileNotFoundError( + Errors.E970.format(str_command=cmd_str, tool=cmd_list[0]) + ) from None + if ret.returncode != 0 and capture: + message = f"Error running command:\n\n{cmd_str}\n\n" + message += f"Subprocess exited with status {ret.returncode}" + if ret.stdout is not None: + message += f"\n\nProcess log (stdout and stderr):\n\n" + message += ret.stdout + error = subprocess.SubprocessError(message) + error.ret = ret # type: ignore[attr-defined] + error.command = cmd_str # type: ignore[attr-defined] + raise error + elif ret.returncode != 0: + sys.exit(ret.returncode) + return ret + + +@contextmanager +def working_dir(path: Union[str, Path]) -> Iterator[Path]: + """Change current working directory and returns to previous on exit. + path (str / Path): The directory to navigate to. + YIELDS (Path): The absolute path to the current working directory. This + should be used if the block needs to perform actions within the working + directory, to prevent mismatches with relative paths. + """ + prev_cwd = Path.cwd() + current = Path(path).resolve() + os.chdir(str(current)) + try: + yield current + finally: + os.chdir(str(prev_cwd)) + + +@contextmanager +def make_tempdir() -> Generator[Path, None, None]: + """Execute a block in a temporary directory and remove the directory and + its contents at the end of the with block. + YIELDS (Path): The path of the temp directory. + """ + d = Path(tempfile.mkdtemp()) + yield d + + # On Windows, git clones use read-only files, which cause permission errors + # when being deleted. This forcibly fixes permissions. + def force_remove(rmfunc, path, ex): + os.chmod(path, stat.S_IWRITE) + rmfunc(path) + + try: + shutil.rmtree(str(d), onerror=force_remove) + except PermissionError as e: + warnings.warn(Warnings.W091.format(dir=d, msg=e)) + + def is_in_jupyter() -> bool: """Check if user is running spaCy from a Jupyter notebook by detecting the IPython kernel. Mainly used for the displaCy visualizer.