mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-10 16:22:29 +03:00
restore some functions that are used beyond projects
This commit is contained in:
parent
f9e39e1139
commit
4de46b8127
|
@ -19,7 +19,7 @@ def download_cli(
|
||||||
# fmt: off
|
# fmt: off
|
||||||
ctx: typer.Context,
|
ctx: typer.Context,
|
||||||
model: str = Arg(..., help="Name of pipeline package to download"),
|
model: str = Arg(..., help="Name of pipeline package to download"),
|
||||||
direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"),
|
direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download PROJECT_of name + version"),
|
||||||
sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"),
|
sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"),
|
||||||
# fmt: on
|
# fmt: on
|
||||||
):
|
):
|
||||||
|
|
111
spacy/util.py
111
spacy/util.py
|
@ -1,6 +1,6 @@
|
||||||
from typing import List, Mapping, NoReturn, Union, Dict, Any, Set, cast
|
from typing import List, Mapping, NoReturn, Union, Dict, Any, Set, cast
|
||||||
from typing import Optional, Iterable, Callable, Tuple, Type
|
from typing import Optional, Iterable, Callable, Tuple, Type
|
||||||
from typing import Iterator, Pattern, TYPE_CHECKING
|
from typing import Iterator, Pattern, Generator, TYPE_CHECKING
|
||||||
from types import ModuleType
|
from types import ModuleType
|
||||||
import os
|
import os
|
||||||
import importlib
|
import importlib
|
||||||
|
@ -22,11 +22,17 @@ import warnings
|
||||||
from packaging.specifiers import SpecifierSet, InvalidSpecifier
|
from packaging.specifiers import SpecifierSet, InvalidSpecifier
|
||||||
from packaging.version import Version, InvalidVersion
|
from packaging.version import Version, InvalidVersion
|
||||||
from packaging.requirements import Requirement
|
from packaging.requirements import Requirement
|
||||||
|
import subprocess
|
||||||
|
from contextlib import contextmanager
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
import shlex
|
||||||
import inspect
|
import inspect
|
||||||
import pkgutil
|
import pkgutil
|
||||||
import logging
|
import logging
|
||||||
import socket
|
import socket
|
||||||
|
import stat
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import cupy.random
|
import cupy.random
|
||||||
|
@ -41,7 +47,7 @@ from thinc.api import fix_random_seed, compounding, decaying # noqa: F401
|
||||||
|
|
||||||
|
|
||||||
from .symbols import ORTH
|
from .symbols import ORTH
|
||||||
from .compat import cupy, CudaStream, importlib_metadata
|
from .compat import cupy, CudaStream, is_windows, importlib_metadata
|
||||||
from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS
|
from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS
|
||||||
from . import about
|
from . import about
|
||||||
|
|
||||||
|
@ -942,6 +948,107 @@ def replace_model_node(model: Model, target: Model, replacement: Model) -> None:
|
||||||
node.set_ref(ref_name, replacement)
|
node.set_ref(ref_name, replacement)
|
||||||
|
|
||||||
|
|
||||||
|
def split_command(command: str) -> List[str]:
|
||||||
|
"""Split a string command using shlex. Handles platform compatibility.
|
||||||
|
command (str) : The command to split
|
||||||
|
RETURNS (List[str]): The split command.
|
||||||
|
"""
|
||||||
|
return shlex.split(command, posix=not is_windows)
|
||||||
|
|
||||||
|
|
||||||
|
def run_command(
|
||||||
|
command: Union[str, List[str]],
|
||||||
|
*,
|
||||||
|
stdin: Optional[Any] = None,
|
||||||
|
capture: bool = False,
|
||||||
|
) -> subprocess.CompletedProcess:
|
||||||
|
"""Run a command on the command line as a subprocess. If the subprocess
|
||||||
|
returns a non-zero exit code, a system exit is performed.
|
||||||
|
command (str / List[str]): The command. If provided as a string, the
|
||||||
|
string will be split using shlex.split.
|
||||||
|
stdin (Optional[Any]): stdin to read from or None.
|
||||||
|
capture (bool): Whether to capture the output and errors. If False,
|
||||||
|
the stdout and stderr will not be redirected, and if there's an error,
|
||||||
|
sys.exit will be called with the return code. You should use capture=False
|
||||||
|
when you want to turn over execution to the command, and capture=True
|
||||||
|
when you want to run the command more like a function.
|
||||||
|
RETURNS (Optional[CompletedProcess]): The process object.
|
||||||
|
"""
|
||||||
|
if isinstance(command, str):
|
||||||
|
cmd_list = split_command(command)
|
||||||
|
cmd_str = command
|
||||||
|
else:
|
||||||
|
cmd_list = command
|
||||||
|
cmd_str = " ".join(command)
|
||||||
|
try:
|
||||||
|
ret = subprocess.run(
|
||||||
|
cmd_list,
|
||||||
|
env=os.environ.copy(),
|
||||||
|
input=stdin,
|
||||||
|
encoding="utf8",
|
||||||
|
check=False,
|
||||||
|
stdout=subprocess.PIPE if capture else None,
|
||||||
|
stderr=subprocess.STDOUT if capture else None,
|
||||||
|
)
|
||||||
|
except FileNotFoundError:
|
||||||
|
# Indicates the *command* wasn't found, it's an error before the command
|
||||||
|
# is run.
|
||||||
|
raise FileNotFoundError(
|
||||||
|
Errors.E970.format(str_command=cmd_str, tool=cmd_list[0])
|
||||||
|
) from None
|
||||||
|
if ret.returncode != 0 and capture:
|
||||||
|
message = f"Error running command:\n\n{cmd_str}\n\n"
|
||||||
|
message += f"Subprocess exited with status {ret.returncode}"
|
||||||
|
if ret.stdout is not None:
|
||||||
|
message += f"\n\nProcess log (stdout and stderr):\n\n"
|
||||||
|
message += ret.stdout
|
||||||
|
error = subprocess.SubprocessError(message)
|
||||||
|
error.ret = ret # type: ignore[attr-defined]
|
||||||
|
error.command = cmd_str # type: ignore[attr-defined]
|
||||||
|
raise error
|
||||||
|
elif ret.returncode != 0:
|
||||||
|
sys.exit(ret.returncode)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def working_dir(path: Union[str, Path]) -> Iterator[Path]:
|
||||||
|
"""Change current working directory and returns to previous on exit.
|
||||||
|
path (str / Path): The directory to navigate to.
|
||||||
|
YIELDS (Path): The absolute path to the current working directory. This
|
||||||
|
should be used if the block needs to perform actions within the working
|
||||||
|
directory, to prevent mismatches with relative paths.
|
||||||
|
"""
|
||||||
|
prev_cwd = Path.cwd()
|
||||||
|
current = Path(path).resolve()
|
||||||
|
os.chdir(str(current))
|
||||||
|
try:
|
||||||
|
yield current
|
||||||
|
finally:
|
||||||
|
os.chdir(str(prev_cwd))
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def make_tempdir() -> Generator[Path, None, None]:
|
||||||
|
"""Execute a block in a temporary directory and remove the directory and
|
||||||
|
its contents at the end of the with block.
|
||||||
|
YIELDS (Path): The path of the temp directory.
|
||||||
|
"""
|
||||||
|
d = Path(tempfile.mkdtemp())
|
||||||
|
yield d
|
||||||
|
|
||||||
|
# On Windows, git clones use read-only files, which cause permission errors
|
||||||
|
# when being deleted. This forcibly fixes permissions.
|
||||||
|
def force_remove(rmfunc, path, ex):
|
||||||
|
os.chmod(path, stat.S_IWRITE)
|
||||||
|
rmfunc(path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
shutil.rmtree(str(d), onerror=force_remove)
|
||||||
|
except PermissionError as e:
|
||||||
|
warnings.warn(Warnings.W091.format(dir=d, msg=e))
|
||||||
|
|
||||||
|
|
||||||
def is_in_jupyter() -> bool:
|
def is_in_jupyter() -> bool:
|
||||||
"""Check if user is running spaCy from a Jupyter notebook by detecting the
|
"""Check if user is running spaCy from a Jupyter notebook by detecting the
|
||||||
IPython kernel. Mainly used for the displaCy visualizer.
|
IPython kernel. Mainly used for the displaCy visualizer.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user