remove project-specific functions in util.py

This commit is contained in:
svlandeg 2023-03-08 14:06:11 +01:00
parent a7d8c06783
commit fa4d587ce1

View File

@ -1,6 +1,6 @@
from typing import List, Mapping, NoReturn, Union, Dict, Any, Set, cast
from typing import Optional, Iterable, Callable, Tuple, Type
from typing import Iterator, Pattern, Generator, TYPE_CHECKING
from typing import Iterator, Pattern, TYPE_CHECKING
from types import ModuleType
import os
import importlib
@ -22,17 +22,11 @@ import warnings
from packaging.specifiers import SpecifierSet, InvalidSpecifier
from packaging.version import Version, InvalidVersion
from packaging.requirements import Requirement
import subprocess
from contextlib import contextmanager
from collections import defaultdict
import tempfile
import shutil
import shlex
import inspect
import pkgutil
import logging
import socket
import stat
try:
import cupy.random
@ -47,7 +41,7 @@ from thinc.api import fix_random_seed, compounding, decaying # noqa: F401
from .symbols import ORTH
from .compat import cupy, CudaStream, is_windows, importlib_metadata
from .compat import cupy, CudaStream, importlib_metadata
from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS
from . import about
@ -78,7 +72,6 @@ logger.addHandler(logger_stream_handler)
class ENV_VARS:
CONFIG_OVERRIDES = "SPACY_CONFIG_OVERRIDES"
PROJECT_USE_GIT_VERSION = "SPACY_PROJECT_USE_GIT_VERSION"
class registry(thinc.registry):
@ -949,130 +942,6 @@ def replace_model_node(model: Model, target: Model, replacement: Model) -> None:
node.set_ref(ref_name, replacement)
def split_command(command: str) -> List[str]:
"""Split a string command using shlex. Handles platform compatibility.
command (str) : The command to split
RETURNS (List[str]): The split command.
"""
return shlex.split(command, posix=not is_windows)
def join_command(command: List[str]) -> str:
"""Join a command using shlex. shlex.join is only available for Python 3.8+,
so we're using a workaround here.
command (List[str]): The command to join.
RETURNS (str): The joined command
"""
return " ".join(shlex.quote(cmd) for cmd in command)
def run_command(
command: Union[str, List[str]],
*,
stdin: Optional[Any] = None,
capture: bool = False,
) -> subprocess.CompletedProcess:
"""Run a command on the command line as a subprocess. If the subprocess
returns a non-zero exit code, a system exit is performed.
command (str / List[str]): The command. If provided as a string, the
string will be split using shlex.split.
stdin (Optional[Any]): stdin to read from or None.
capture (bool): Whether to capture the output and errors. If False,
the stdout and stderr will not be redirected, and if there's an error,
sys.exit will be called with the return code. You should use capture=False
when you want to turn over execution to the command, and capture=True
when you want to run the command more like a function.
RETURNS (Optional[CompletedProcess]): The process object.
"""
if isinstance(command, str):
cmd_list = split_command(command)
cmd_str = command
else:
cmd_list = command
cmd_str = " ".join(command)
try:
ret = subprocess.run(
cmd_list,
env=os.environ.copy(),
input=stdin,
encoding="utf8",
check=False,
stdout=subprocess.PIPE if capture else None,
stderr=subprocess.STDOUT if capture else None,
)
except FileNotFoundError:
# Indicates the *command* wasn't found, it's an error before the command
# is run.
raise FileNotFoundError(
Errors.E970.format(str_command=cmd_str, tool=cmd_list[0])
) from None
if ret.returncode != 0 and capture:
message = f"Error running command:\n\n{cmd_str}\n\n"
message += f"Subprocess exited with status {ret.returncode}"
if ret.stdout is not None:
message += f"\n\nProcess log (stdout and stderr):\n\n"
message += ret.stdout
error = subprocess.SubprocessError(message)
error.ret = ret # type: ignore[attr-defined]
error.command = cmd_str # type: ignore[attr-defined]
raise error
elif ret.returncode != 0:
sys.exit(ret.returncode)
return ret
@contextmanager
def working_dir(path: Union[str, Path]) -> Iterator[Path]:
"""Change current working directory and returns to previous on exit.
path (str / Path): The directory to navigate to.
YIELDS (Path): The absolute path to the current working directory. This
should be used if the block needs to perform actions within the working
directory, to prevent mismatches with relative paths.
"""
prev_cwd = Path.cwd()
current = Path(path).resolve()
os.chdir(str(current))
try:
yield current
finally:
os.chdir(str(prev_cwd))
@contextmanager
def make_tempdir() -> Generator[Path, None, None]:
"""Execute a block in a temporary directory and remove the directory and
its contents at the end of the with block.
YIELDS (Path): The path of the temp directory.
"""
d = Path(tempfile.mkdtemp())
yield d
# On Windows, git clones use read-only files, which cause permission errors
# when being deleted. This forcibly fixes permissions.
def force_remove(rmfunc, path, ex):
os.chmod(path, stat.S_IWRITE)
rmfunc(path)
try:
shutil.rmtree(str(d), onerror=force_remove)
except PermissionError as e:
warnings.warn(Warnings.W091.format(dir=d, msg=e))
def is_cwd(path: Union[Path, str]) -> bool:
"""Check whether a path is the current working directory.
path (Union[Path, str]): The directory path.
RETURNS (bool): Whether the path is the current working directory.
"""
return str(Path(path).resolve()).lower() == str(Path.cwd().resolve()).lower()
def is_in_jupyter() -> bool:
"""Check if user is running spaCy from a Jupyter notebook by detecting the
IPython kernel. Mainly used for the displaCy visualizer.