mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-30 09:53:04 +03:00
remove project-specific functions in util.py
This commit is contained in:
parent
a7d8c06783
commit
fa4d587ce1
135
spacy/util.py
135
spacy/util.py
|
@ -1,6 +1,6 @@
|
||||||
from typing import List, Mapping, NoReturn, Union, Dict, Any, Set, cast
|
from typing import List, Mapping, NoReturn, Union, Dict, Any, Set, cast
|
||||||
from typing import Optional, Iterable, Callable, Tuple, Type
|
from typing import Optional, Iterable, Callable, Tuple, Type
|
||||||
from typing import Iterator, Pattern, Generator, TYPE_CHECKING
|
from typing import Iterator, Pattern, TYPE_CHECKING
|
||||||
from types import ModuleType
|
from types import ModuleType
|
||||||
import os
|
import os
|
||||||
import importlib
|
import importlib
|
||||||
|
@ -22,17 +22,11 @@ import warnings
|
||||||
from packaging.specifiers import SpecifierSet, InvalidSpecifier
|
from packaging.specifiers import SpecifierSet, InvalidSpecifier
|
||||||
from packaging.version import Version, InvalidVersion
|
from packaging.version import Version, InvalidVersion
|
||||||
from packaging.requirements import Requirement
|
from packaging.requirements import Requirement
|
||||||
import subprocess
|
|
||||||
from contextlib import contextmanager
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import tempfile
|
|
||||||
import shutil
|
|
||||||
import shlex
|
|
||||||
import inspect
|
import inspect
|
||||||
import pkgutil
|
import pkgutil
|
||||||
import logging
|
import logging
|
||||||
import socket
|
import socket
|
||||||
import stat
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import cupy.random
|
import cupy.random
|
||||||
|
@ -47,7 +41,7 @@ from thinc.api import fix_random_seed, compounding, decaying # noqa: F401
|
||||||
|
|
||||||
|
|
||||||
from .symbols import ORTH
|
from .symbols import ORTH
|
||||||
from .compat import cupy, CudaStream, is_windows, importlib_metadata
|
from .compat import cupy, CudaStream, importlib_metadata
|
||||||
from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS
|
from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS
|
||||||
from . import about
|
from . import about
|
||||||
|
|
||||||
|
@ -78,7 +72,6 @@ logger.addHandler(logger_stream_handler)
|
||||||
|
|
||||||
class ENV_VARS:
|
class ENV_VARS:
|
||||||
CONFIG_OVERRIDES = "SPACY_CONFIG_OVERRIDES"
|
CONFIG_OVERRIDES = "SPACY_CONFIG_OVERRIDES"
|
||||||
PROJECT_USE_GIT_VERSION = "SPACY_PROJECT_USE_GIT_VERSION"
|
|
||||||
|
|
||||||
|
|
||||||
class registry(thinc.registry):
|
class registry(thinc.registry):
|
||||||
|
@ -949,130 +942,6 @@ def replace_model_node(model: Model, target: Model, replacement: Model) -> None:
|
||||||
node.set_ref(ref_name, replacement)
|
node.set_ref(ref_name, replacement)
|
||||||
|
|
||||||
|
|
||||||
def split_command(command: str) -> List[str]:
|
|
||||||
"""Split a string command using shlex. Handles platform compatibility.
|
|
||||||
|
|
||||||
command (str) : The command to split
|
|
||||||
RETURNS (List[str]): The split command.
|
|
||||||
"""
|
|
||||||
return shlex.split(command, posix=not is_windows)
|
|
||||||
|
|
||||||
|
|
||||||
def join_command(command: List[str]) -> str:
|
|
||||||
"""Join a command using shlex. shlex.join is only available for Python 3.8+,
|
|
||||||
so we're using a workaround here.
|
|
||||||
|
|
||||||
command (List[str]): The command to join.
|
|
||||||
RETURNS (str): The joined command
|
|
||||||
"""
|
|
||||||
return " ".join(shlex.quote(cmd) for cmd in command)
|
|
||||||
|
|
||||||
|
|
||||||
def run_command(
|
|
||||||
command: Union[str, List[str]],
|
|
||||||
*,
|
|
||||||
stdin: Optional[Any] = None,
|
|
||||||
capture: bool = False,
|
|
||||||
) -> subprocess.CompletedProcess:
|
|
||||||
"""Run a command on the command line as a subprocess. If the subprocess
|
|
||||||
returns a non-zero exit code, a system exit is performed.
|
|
||||||
|
|
||||||
command (str / List[str]): The command. If provided as a string, the
|
|
||||||
string will be split using shlex.split.
|
|
||||||
stdin (Optional[Any]): stdin to read from or None.
|
|
||||||
capture (bool): Whether to capture the output and errors. If False,
|
|
||||||
the stdout and stderr will not be redirected, and if there's an error,
|
|
||||||
sys.exit will be called with the return code. You should use capture=False
|
|
||||||
when you want to turn over execution to the command, and capture=True
|
|
||||||
when you want to run the command more like a function.
|
|
||||||
RETURNS (Optional[CompletedProcess]): The process object.
|
|
||||||
"""
|
|
||||||
if isinstance(command, str):
|
|
||||||
cmd_list = split_command(command)
|
|
||||||
cmd_str = command
|
|
||||||
else:
|
|
||||||
cmd_list = command
|
|
||||||
cmd_str = " ".join(command)
|
|
||||||
try:
|
|
||||||
ret = subprocess.run(
|
|
||||||
cmd_list,
|
|
||||||
env=os.environ.copy(),
|
|
||||||
input=stdin,
|
|
||||||
encoding="utf8",
|
|
||||||
check=False,
|
|
||||||
stdout=subprocess.PIPE if capture else None,
|
|
||||||
stderr=subprocess.STDOUT if capture else None,
|
|
||||||
)
|
|
||||||
except FileNotFoundError:
|
|
||||||
# Indicates the *command* wasn't found, it's an error before the command
|
|
||||||
# is run.
|
|
||||||
raise FileNotFoundError(
|
|
||||||
Errors.E970.format(str_command=cmd_str, tool=cmd_list[0])
|
|
||||||
) from None
|
|
||||||
if ret.returncode != 0 and capture:
|
|
||||||
message = f"Error running command:\n\n{cmd_str}\n\n"
|
|
||||||
message += f"Subprocess exited with status {ret.returncode}"
|
|
||||||
if ret.stdout is not None:
|
|
||||||
message += f"\n\nProcess log (stdout and stderr):\n\n"
|
|
||||||
message += ret.stdout
|
|
||||||
error = subprocess.SubprocessError(message)
|
|
||||||
error.ret = ret # type: ignore[attr-defined]
|
|
||||||
error.command = cmd_str # type: ignore[attr-defined]
|
|
||||||
raise error
|
|
||||||
elif ret.returncode != 0:
|
|
||||||
sys.exit(ret.returncode)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
|
||||||
def working_dir(path: Union[str, Path]) -> Iterator[Path]:
|
|
||||||
"""Change current working directory and returns to previous on exit.
|
|
||||||
|
|
||||||
path (str / Path): The directory to navigate to.
|
|
||||||
YIELDS (Path): The absolute path to the current working directory. This
|
|
||||||
should be used if the block needs to perform actions within the working
|
|
||||||
directory, to prevent mismatches with relative paths.
|
|
||||||
"""
|
|
||||||
prev_cwd = Path.cwd()
|
|
||||||
current = Path(path).resolve()
|
|
||||||
os.chdir(str(current))
|
|
||||||
try:
|
|
||||||
yield current
|
|
||||||
finally:
|
|
||||||
os.chdir(str(prev_cwd))
|
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
|
||||||
def make_tempdir() -> Generator[Path, None, None]:
|
|
||||||
"""Execute a block in a temporary directory and remove the directory and
|
|
||||||
its contents at the end of the with block.
|
|
||||||
|
|
||||||
YIELDS (Path): The path of the temp directory.
|
|
||||||
"""
|
|
||||||
d = Path(tempfile.mkdtemp())
|
|
||||||
yield d
|
|
||||||
|
|
||||||
# On Windows, git clones use read-only files, which cause permission errors
|
|
||||||
# when being deleted. This forcibly fixes permissions.
|
|
||||||
def force_remove(rmfunc, path, ex):
|
|
||||||
os.chmod(path, stat.S_IWRITE)
|
|
||||||
rmfunc(path)
|
|
||||||
|
|
||||||
try:
|
|
||||||
shutil.rmtree(str(d), onerror=force_remove)
|
|
||||||
except PermissionError as e:
|
|
||||||
warnings.warn(Warnings.W091.format(dir=d, msg=e))
|
|
||||||
|
|
||||||
|
|
||||||
def is_cwd(path: Union[Path, str]) -> bool:
|
|
||||||
"""Check whether a path is the current working directory.
|
|
||||||
|
|
||||||
path (Union[Path, str]): The directory path.
|
|
||||||
RETURNS (bool): Whether the path is the current working directory.
|
|
||||||
"""
|
|
||||||
return str(Path(path).resolve()).lower() == str(Path.cwd().resolve()).lower()
|
|
||||||
|
|
||||||
|
|
||||||
def is_in_jupyter() -> bool:
|
def is_in_jupyter() -> bool:
|
||||||
"""Check if user is running spaCy from a Jupyter notebook by detecting the
|
"""Check if user is running spaCy from a Jupyter notebook by detecting the
|
||||||
IPython kernel. Mainly used for the displaCy visualizer.
|
IPython kernel. Mainly used for the displaCy visualizer.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user