mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-22 18:12:00 +03:00
remove project-specific functions in util.py
This commit is contained in:
parent
a7d8c06783
commit
fa4d587ce1
135
spacy/util.py
135
spacy/util.py
|
@ -1,6 +1,6 @@
|
|||
from typing import List, Mapping, NoReturn, Union, Dict, Any, Set, cast
|
||||
from typing import Optional, Iterable, Callable, Tuple, Type
|
||||
from typing import Iterator, Pattern, Generator, TYPE_CHECKING
|
||||
from typing import Iterator, Pattern, TYPE_CHECKING
|
||||
from types import ModuleType
|
||||
import os
|
||||
import importlib
|
||||
|
@ -22,17 +22,11 @@ import warnings
|
|||
from packaging.specifiers import SpecifierSet, InvalidSpecifier
|
||||
from packaging.version import Version, InvalidVersion
|
||||
from packaging.requirements import Requirement
|
||||
import subprocess
|
||||
from contextlib import contextmanager
|
||||
from collections import defaultdict
|
||||
import tempfile
|
||||
import shutil
|
||||
import shlex
|
||||
import inspect
|
||||
import pkgutil
|
||||
import logging
|
||||
import socket
|
||||
import stat
|
||||
|
||||
try:
|
||||
import cupy.random
|
||||
|
@ -47,7 +41,7 @@ from thinc.api import fix_random_seed, compounding, decaying # noqa: F401
|
|||
|
||||
|
||||
from .symbols import ORTH
|
||||
from .compat import cupy, CudaStream, is_windows, importlib_metadata
|
||||
from .compat import cupy, CudaStream, importlib_metadata
|
||||
from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS
|
||||
from . import about
|
||||
|
||||
|
@ -78,7 +72,6 @@ logger.addHandler(logger_stream_handler)
|
|||
|
||||
class ENV_VARS:
|
||||
CONFIG_OVERRIDES = "SPACY_CONFIG_OVERRIDES"
|
||||
PROJECT_USE_GIT_VERSION = "SPACY_PROJECT_USE_GIT_VERSION"
|
||||
|
||||
|
||||
class registry(thinc.registry):
|
||||
|
@ -949,130 +942,6 @@ def replace_model_node(model: Model, target: Model, replacement: Model) -> None:
|
|||
node.set_ref(ref_name, replacement)
|
||||
|
||||
|
||||
def split_command(command: str) -> List[str]:
|
||||
"""Split a string command using shlex. Handles platform compatibility.
|
||||
|
||||
command (str) : The command to split
|
||||
RETURNS (List[str]): The split command.
|
||||
"""
|
||||
return shlex.split(command, posix=not is_windows)
|
||||
|
||||
|
||||
def join_command(command: List[str]) -> str:
|
||||
"""Join a command using shlex. shlex.join is only available for Python 3.8+,
|
||||
so we're using a workaround here.
|
||||
|
||||
command (List[str]): The command to join.
|
||||
RETURNS (str): The joined command
|
||||
"""
|
||||
return " ".join(shlex.quote(cmd) for cmd in command)
|
||||
|
||||
|
||||
def run_command(
|
||||
command: Union[str, List[str]],
|
||||
*,
|
||||
stdin: Optional[Any] = None,
|
||||
capture: bool = False,
|
||||
) -> subprocess.CompletedProcess:
|
||||
"""Run a command on the command line as a subprocess. If the subprocess
|
||||
returns a non-zero exit code, a system exit is performed.
|
||||
|
||||
command (str / List[str]): The command. If provided as a string, the
|
||||
string will be split using shlex.split.
|
||||
stdin (Optional[Any]): stdin to read from or None.
|
||||
capture (bool): Whether to capture the output and errors. If False,
|
||||
the stdout and stderr will not be redirected, and if there's an error,
|
||||
sys.exit will be called with the return code. You should use capture=False
|
||||
when you want to turn over execution to the command, and capture=True
|
||||
when you want to run the command more like a function.
|
||||
RETURNS (Optional[CompletedProcess]): The process object.
|
||||
"""
|
||||
if isinstance(command, str):
|
||||
cmd_list = split_command(command)
|
||||
cmd_str = command
|
||||
else:
|
||||
cmd_list = command
|
||||
cmd_str = " ".join(command)
|
||||
try:
|
||||
ret = subprocess.run(
|
||||
cmd_list,
|
||||
env=os.environ.copy(),
|
||||
input=stdin,
|
||||
encoding="utf8",
|
||||
check=False,
|
||||
stdout=subprocess.PIPE if capture else None,
|
||||
stderr=subprocess.STDOUT if capture else None,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
# Indicates the *command* wasn't found, it's an error before the command
|
||||
# is run.
|
||||
raise FileNotFoundError(
|
||||
Errors.E970.format(str_command=cmd_str, tool=cmd_list[0])
|
||||
) from None
|
||||
if ret.returncode != 0 and capture:
|
||||
message = f"Error running command:\n\n{cmd_str}\n\n"
|
||||
message += f"Subprocess exited with status {ret.returncode}"
|
||||
if ret.stdout is not None:
|
||||
message += f"\n\nProcess log (stdout and stderr):\n\n"
|
||||
message += ret.stdout
|
||||
error = subprocess.SubprocessError(message)
|
||||
error.ret = ret # type: ignore[attr-defined]
|
||||
error.command = cmd_str # type: ignore[attr-defined]
|
||||
raise error
|
||||
elif ret.returncode != 0:
|
||||
sys.exit(ret.returncode)
|
||||
return ret
|
||||
|
||||
|
||||
@contextmanager
|
||||
def working_dir(path: Union[str, Path]) -> Iterator[Path]:
|
||||
"""Change current working directory and returns to previous on exit.
|
||||
|
||||
path (str / Path): The directory to navigate to.
|
||||
YIELDS (Path): The absolute path to the current working directory. This
|
||||
should be used if the block needs to perform actions within the working
|
||||
directory, to prevent mismatches with relative paths.
|
||||
"""
|
||||
prev_cwd = Path.cwd()
|
||||
current = Path(path).resolve()
|
||||
os.chdir(str(current))
|
||||
try:
|
||||
yield current
|
||||
finally:
|
||||
os.chdir(str(prev_cwd))
|
||||
|
||||
|
||||
@contextmanager
|
||||
def make_tempdir() -> Generator[Path, None, None]:
|
||||
"""Execute a block in a temporary directory and remove the directory and
|
||||
its contents at the end of the with block.
|
||||
|
||||
YIELDS (Path): The path of the temp directory.
|
||||
"""
|
||||
d = Path(tempfile.mkdtemp())
|
||||
yield d
|
||||
|
||||
# On Windows, git clones use read-only files, which cause permission errors
|
||||
# when being deleted. This forcibly fixes permissions.
|
||||
def force_remove(rmfunc, path, ex):
|
||||
os.chmod(path, stat.S_IWRITE)
|
||||
rmfunc(path)
|
||||
|
||||
try:
|
||||
shutil.rmtree(str(d), onerror=force_remove)
|
||||
except PermissionError as e:
|
||||
warnings.warn(Warnings.W091.format(dir=d, msg=e))
|
||||
|
||||
|
||||
def is_cwd(path: Union[Path, str]) -> bool:
|
||||
"""Check whether a path is the current working directory.
|
||||
|
||||
path (Union[Path, str]): The directory path.
|
||||
RETURNS (bool): Whether the path is the current working directory.
|
||||
"""
|
||||
return str(Path(path).resolve()).lower() == str(Path.cwd().resolve()).lower()
|
||||
|
||||
|
||||
def is_in_jupyter() -> bool:
|
||||
"""Check if user is running spaCy from a Jupyter notebook by detecting the
|
||||
IPython kernel. Mainly used for the displaCy visualizer.
|
||||
|
|
Loading…
Reference in New Issue
Block a user