From fa4d587ce179840e087bd36a231ece9f7b9854d1 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Wed, 8 Mar 2023 14:06:11 +0100 Subject: [PATCH] remove project-specific functions in util.py --- spacy/util.py | 135 +------------------------------------------------- 1 file changed, 2 insertions(+), 133 deletions(-) diff --git a/spacy/util.py b/spacy/util.py index 8cc89217d..1b7f96c45 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -1,6 +1,6 @@ from typing import List, Mapping, NoReturn, Union, Dict, Any, Set, cast from typing import Optional, Iterable, Callable, Tuple, Type -from typing import Iterator, Pattern, Generator, TYPE_CHECKING +from typing import Iterator, Pattern, TYPE_CHECKING from types import ModuleType import os import importlib @@ -22,17 +22,11 @@ import warnings from packaging.specifiers import SpecifierSet, InvalidSpecifier from packaging.version import Version, InvalidVersion from packaging.requirements import Requirement -import subprocess -from contextlib import contextmanager from collections import defaultdict -import tempfile -import shutil -import shlex import inspect import pkgutil import logging import socket -import stat try: import cupy.random @@ -47,7 +41,7 @@ from thinc.api import fix_random_seed, compounding, decaying # noqa: F401 from .symbols import ORTH -from .compat import cupy, CudaStream, is_windows, importlib_metadata +from .compat import cupy, CudaStream, importlib_metadata from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS from . import about @@ -78,7 +72,6 @@ logger.addHandler(logger_stream_handler) class ENV_VARS: CONFIG_OVERRIDES = "SPACY_CONFIG_OVERRIDES" - PROJECT_USE_GIT_VERSION = "SPACY_PROJECT_USE_GIT_VERSION" class registry(thinc.registry): @@ -949,130 +942,6 @@ def replace_model_node(model: Model, target: Model, replacement: Model) -> None: node.set_ref(ref_name, replacement) -def split_command(command: str) -> List[str]: - """Split a string command using shlex. Handles platform compatibility. - - command (str) : The command to split - RETURNS (List[str]): The split command. - """ - return shlex.split(command, posix=not is_windows) - - -def join_command(command: List[str]) -> str: - """Join a command using shlex. shlex.join is only available for Python 3.8+, - so we're using a workaround here. - - command (List[str]): The command to join. - RETURNS (str): The joined command - """ - return " ".join(shlex.quote(cmd) for cmd in command) - - -def run_command( - command: Union[str, List[str]], - *, - stdin: Optional[Any] = None, - capture: bool = False, -) -> subprocess.CompletedProcess: - """Run a command on the command line as a subprocess. If the subprocess - returns a non-zero exit code, a system exit is performed. - - command (str / List[str]): The command. If provided as a string, the - string will be split using shlex.split. - stdin (Optional[Any]): stdin to read from or None. - capture (bool): Whether to capture the output and errors. If False, - the stdout and stderr will not be redirected, and if there's an error, - sys.exit will be called with the return code. You should use capture=False - when you want to turn over execution to the command, and capture=True - when you want to run the command more like a function. - RETURNS (Optional[CompletedProcess]): The process object. - """ - if isinstance(command, str): - cmd_list = split_command(command) - cmd_str = command - else: - cmd_list = command - cmd_str = " ".join(command) - try: - ret = subprocess.run( - cmd_list, - env=os.environ.copy(), - input=stdin, - encoding="utf8", - check=False, - stdout=subprocess.PIPE if capture else None, - stderr=subprocess.STDOUT if capture else None, - ) - except FileNotFoundError: - # Indicates the *command* wasn't found, it's an error before the command - # is run. - raise FileNotFoundError( - Errors.E970.format(str_command=cmd_str, tool=cmd_list[0]) - ) from None - if ret.returncode != 0 and capture: - message = f"Error running command:\n\n{cmd_str}\n\n" - message += f"Subprocess exited with status {ret.returncode}" - if ret.stdout is not None: - message += f"\n\nProcess log (stdout and stderr):\n\n" - message += ret.stdout - error = subprocess.SubprocessError(message) - error.ret = ret # type: ignore[attr-defined] - error.command = cmd_str # type: ignore[attr-defined] - raise error - elif ret.returncode != 0: - sys.exit(ret.returncode) - return ret - - -@contextmanager -def working_dir(path: Union[str, Path]) -> Iterator[Path]: - """Change current working directory and returns to previous on exit. - - path (str / Path): The directory to navigate to. - YIELDS (Path): The absolute path to the current working directory. This - should be used if the block needs to perform actions within the working - directory, to prevent mismatches with relative paths. - """ - prev_cwd = Path.cwd() - current = Path(path).resolve() - os.chdir(str(current)) - try: - yield current - finally: - os.chdir(str(prev_cwd)) - - -@contextmanager -def make_tempdir() -> Generator[Path, None, None]: - """Execute a block in a temporary directory and remove the directory and - its contents at the end of the with block. - - YIELDS (Path): The path of the temp directory. - """ - d = Path(tempfile.mkdtemp()) - yield d - - # On Windows, git clones use read-only files, which cause permission errors - # when being deleted. This forcibly fixes permissions. - def force_remove(rmfunc, path, ex): - os.chmod(path, stat.S_IWRITE) - rmfunc(path) - - try: - shutil.rmtree(str(d), onerror=force_remove) - except PermissionError as e: - warnings.warn(Warnings.W091.format(dir=d, msg=e)) - - -def is_cwd(path: Union[Path, str]) -> bool: - """Check whether a path is the current working directory. - - path (Union[Path, str]): The directory path. - RETURNS (bool): Whether the path is the current working directory. - """ - return str(Path(path).resolve()).lower() == str(Path.cwd().resolve()).lower() - - def is_in_jupyter() -> bool: """Check if user is running spaCy from a Jupyter notebook by detecting the IPython kernel. Mainly used for the displaCy visualizer.