mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Use frozen list with custom errors
We don't want to break backwards compatibility too much but we also want to provide the best possible UX
This commit is contained in:
parent
6520d1a1df
commit
34146750d4
|
@ -27,8 +27,8 @@ if sys.maxunicode == 65535:
|
|||
|
||||
def load(
|
||||
name: Union[str, Path],
|
||||
disable: Iterable[str] = tuple(),
|
||||
exclude: Iterable[str] = tuple(),
|
||||
disable: Iterable[str] = util.SimpleFrozenList(),
|
||||
exclude: Iterable[str] = util.SimpleFrozenList(),
|
||||
config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
|
||||
) -> Language:
|
||||
"""Load a spaCy model from an installed package or a local path.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
"""This module contains helpers and subcommands for integrating spaCy projects
|
||||
with Data Version Controk (DVC). https://dvc.org"""
|
||||
from typing import Dict, Any, List, Optional
|
||||
from typing import Dict, Any, List, Optional, Iterable
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from wasabi import msg
|
||||
|
@ -8,6 +8,7 @@ from wasabi import msg
|
|||
from .._util import PROJECT_FILE, load_project_config, get_hash, project_cli
|
||||
from .._util import Arg, Opt, NAME, COMMAND
|
||||
from ...util import working_dir, split_command, join_command, run_command
|
||||
from ...util import SimpleFrozenList
|
||||
|
||||
|
||||
DVC_CONFIG = "dvc.yaml"
|
||||
|
@ -130,7 +131,7 @@ def update_dvc_config(
|
|||
|
||||
|
||||
def run_dvc_commands(
|
||||
commands: List[str] = tuple(), flags: Dict[str, bool] = {},
|
||||
commands: Iterable[str] = SimpleFrozenList(), flags: Dict[str, bool] = {},
|
||||
) -> None:
|
||||
"""Run a sequence of DVC commands in a subprocess, in order.
|
||||
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
from typing import Optional, List, Dict, Sequence, Any
|
||||
from typing import Optional, List, Dict, Sequence, Any, Iterable
|
||||
from pathlib import Path
|
||||
from wasabi import msg
|
||||
import sys
|
||||
import srsly
|
||||
|
||||
from ...util import working_dir, run_command, split_command, is_cwd, join_command
|
||||
from ...util import SimpleFrozenList
|
||||
from .._util import PROJECT_FILE, PROJECT_LOCK, load_project_config, get_hash
|
||||
from .._util import get_checksum, project_cli, Arg, Opt, COMMAND
|
||||
|
||||
|
@ -115,7 +116,9 @@ def print_run_help(project_dir: Path, subcommand: Optional[str] = None) -> None:
|
|||
|
||||
|
||||
def run_commands(
|
||||
commands: List[str] = tuple(), silent: bool = False, dry: bool = False,
|
||||
commands: Iterable[str] = SimpleFrozenList(),
|
||||
silent: bool = False,
|
||||
dry: bool = False,
|
||||
) -> None:
|
||||
"""Run a sequence of commands in a subprocess, in order.
|
||||
|
||||
|
|
|
@ -472,6 +472,13 @@ class Errors:
|
|||
E199 = ("Unable to merge 0-length span at doc[{start}:{end}].")
|
||||
|
||||
# TODO: fix numbering after merging develop into master
|
||||
E926 = ("It looks like you're trying to modify nlp.{attr} directly. This "
|
||||
"doesn't work because it's an immutable computed property. If you "
|
||||
"need to modify the pipeline, use the built-in methods like "
|
||||
"nlp.add_pipe, nlp.remove_pipe, nlp.disable_pipe or nlp.enable_pipe "
|
||||
"instead.")
|
||||
E927 = ("Can't write to frozen list Maybe you're trying to modify a computed "
|
||||
"property or default function argument?")
|
||||
E928 = ("A 'KnowledgeBase' should be written to / read from a file, but the "
|
||||
"provided argument {loc} is an existing directory.")
|
||||
E929 = ("A 'KnowledgeBase' could not be read from {loc} - the path does "
|
||||
|
|
|
@ -20,7 +20,7 @@ from .vocab import Vocab, create_vocab
|
|||
from .pipe_analysis import validate_attrs, analyze_pipes, print_pipe_analysis
|
||||
from .gold import Example, validate_examples
|
||||
from .scorer import Scorer
|
||||
from .util import create_default_optimizer, registry
|
||||
from .util import create_default_optimizer, registry, SimpleFrozenList
|
||||
from .util import SimpleFrozenDict, combine_score_weights, CONFIG_SECTION_ORDER
|
||||
from .lang.tokenizer_exceptions import URL_MATCH, BASE_EXCEPTIONS
|
||||
from .lang.punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
|
||||
|
@ -159,7 +159,7 @@ class Language:
|
|||
self.vocab: Vocab = vocab
|
||||
if self.lang is None:
|
||||
self.lang = self.vocab.lang
|
||||
self.components = []
|
||||
self._components = []
|
||||
self._disabled = set()
|
||||
self.max_length = max_length
|
||||
self.resolved = {}
|
||||
|
@ -207,11 +207,11 @@ class Language:
|
|||
"keys": self.vocab.vectors.n_keys,
|
||||
"name": self.vocab.vectors.name,
|
||||
}
|
||||
self._meta["labels"] = self.pipe_labels
|
||||
self._meta["labels"] = dict(self.pipe_labels)
|
||||
# TODO: Adding this back to prevent breaking people's code etc., but
|
||||
# we should consider removing it
|
||||
self._meta["pipeline"] = self.pipe_names
|
||||
self._meta["disabled"] = self.disabled
|
||||
self._meta["pipeline"] = list(self.pipe_names)
|
||||
self._meta["disabled"] = list(self.disabled)
|
||||
return self._meta
|
||||
|
||||
@meta.setter
|
||||
|
@ -240,8 +240,8 @@ class Language:
|
|||
pipeline[pipe_name] = {"factory": pipe_meta.factory, **pipe_config}
|
||||
if pipe_meta.default_score_weights:
|
||||
score_weights.append(pipe_meta.default_score_weights)
|
||||
self._config["nlp"]["pipeline"] = self.component_names
|
||||
self._config["nlp"]["disabled"] = self.disabled
|
||||
self._config["nlp"]["pipeline"] = list(self.component_names)
|
||||
self._config["nlp"]["disabled"] = list(self.disabled)
|
||||
self._config["components"] = pipeline
|
||||
self._config["training"]["score_weights"] = combine_score_weights(score_weights)
|
||||
if not srsly.is_json_serializable(self._config):
|
||||
|
@ -260,7 +260,8 @@ class Language:
|
|||
"""
|
||||
# Make sure the disabled components are returned in the order they
|
||||
# appear in the pipeline (which isn't guaranteed by the set)
|
||||
return [name for name, _ in self.components if name in self._disabled]
|
||||
names = [name for name, _ in self._components if name in self._disabled]
|
||||
return SimpleFrozenList(names, error=Errors.E926.format(attr="disabled"))
|
||||
|
||||
@property
|
||||
def factory_names(self) -> List[str]:
|
||||
|
@ -268,7 +269,17 @@ class Language:
|
|||
|
||||
RETURNS (List[str]): The factory names.
|
||||
"""
|
||||
return list(self.factories.keys())
|
||||
names = list(self.factories.keys())
|
||||
return SimpleFrozenList(names)
|
||||
|
||||
@property
|
||||
def components(self) -> List[Tuple[str, Callable[[Doc], Doc]]]:
|
||||
"""Get all (name, component) tuples in the pipeline, including the
|
||||
currently disabled components.
|
||||
"""
|
||||
return SimpleFrozenList(
|
||||
self._components, error=Errors.E926.format(attr="components")
|
||||
)
|
||||
|
||||
@property
|
||||
def component_names(self) -> List[str]:
|
||||
|
@ -277,7 +288,8 @@ class Language:
|
|||
|
||||
RETURNS (List[str]): List of component name strings, in order.
|
||||
"""
|
||||
return [pipe_name for pipe_name, _ in self.components]
|
||||
names = [pipe_name for pipe_name, _ in self._components]
|
||||
return SimpleFrozenList(names, error=Errors.E926.format(attr="component_names"))
|
||||
|
||||
@property
|
||||
def pipeline(self) -> List[Tuple[str, Callable[[Doc], Doc]]]:
|
||||
|
@ -287,7 +299,8 @@ class Language:
|
|||
|
||||
RETURNS (List[Tuple[str, Callable[[Doc], Doc]]]): The pipeline.
|
||||
"""
|
||||
return [(name, p) for name, p in self.components if name not in self._disabled]
|
||||
pipes = [(n, p) for n, p in self._components if n not in self._disabled]
|
||||
return SimpleFrozenList(pipes, error=Errors.E926.format(attr="pipeline"))
|
||||
|
||||
@property
|
||||
def pipe_names(self) -> List[str]:
|
||||
|
@ -295,7 +308,8 @@ class Language:
|
|||
|
||||
RETURNS (List[str]): List of component name strings, in order.
|
||||
"""
|
||||
return [pipe_name for pipe_name, _ in self.pipeline]
|
||||
names = [pipe_name for pipe_name, _ in self.pipeline]
|
||||
return SimpleFrozenList(names, error=Errors.E926.format(attr="pipe_names"))
|
||||
|
||||
@property
|
||||
def pipe_factories(self) -> Dict[str, str]:
|
||||
|
@ -304,9 +318,9 @@ class Language:
|
|||
RETURNS (Dict[str, str]): Factory names, keyed by component names.
|
||||
"""
|
||||
factories = {}
|
||||
for pipe_name, pipe in self.components:
|
||||
for pipe_name, pipe in self._components:
|
||||
factories[pipe_name] = self.get_pipe_meta(pipe_name).factory
|
||||
return factories
|
||||
return SimpleFrozenDict(factories)
|
||||
|
||||
@property
|
||||
def pipe_labels(self) -> Dict[str, List[str]]:
|
||||
|
@ -316,10 +330,10 @@ class Language:
|
|||
RETURNS (Dict[str, List[str]]): Labels keyed by component name.
|
||||
"""
|
||||
labels = {}
|
||||
for name, pipe in self.components:
|
||||
for name, pipe in self._components:
|
||||
if hasattr(pipe, "labels"):
|
||||
labels[name] = list(pipe.labels)
|
||||
return labels
|
||||
return SimpleFrozenDict(labels)
|
||||
|
||||
@classmethod
|
||||
def has_factory(cls, name: str) -> bool:
|
||||
|
@ -390,10 +404,10 @@ class Language:
|
|||
name: str,
|
||||
*,
|
||||
default_config: Dict[str, Any] = SimpleFrozenDict(),
|
||||
assigns: Iterable[str] = tuple(),
|
||||
requires: Iterable[str] = tuple(),
|
||||
assigns: Iterable[str] = SimpleFrozenList(),
|
||||
requires: Iterable[str] = SimpleFrozenList(),
|
||||
retokenizes: bool = False,
|
||||
scores: Iterable[str] = tuple(),
|
||||
scores: Iterable[str] = SimpleFrozenList(),
|
||||
default_score_weights: Dict[str, float] = SimpleFrozenDict(),
|
||||
func: Optional[Callable] = None,
|
||||
) -> Callable:
|
||||
|
@ -471,8 +485,8 @@ class Language:
|
|||
cls,
|
||||
name: Optional[str] = None,
|
||||
*,
|
||||
assigns: Iterable[str] = tuple(),
|
||||
requires: Iterable[str] = tuple(),
|
||||
assigns: Iterable[str] = SimpleFrozenList(),
|
||||
requires: Iterable[str] = SimpleFrozenList(),
|
||||
retokenizes: bool = False,
|
||||
func: Optional[Callable[[Doc], Doc]] = None,
|
||||
) -> Callable:
|
||||
|
@ -544,7 +558,7 @@ class Language:
|
|||
|
||||
DOCS: https://spacy.io/api/language#get_pipe
|
||||
"""
|
||||
for pipe_name, component in self.components:
|
||||
for pipe_name, component in self._components:
|
||||
if pipe_name == name:
|
||||
return component
|
||||
raise KeyError(Errors.E001.format(name=name, opts=self.component_names))
|
||||
|
@ -718,7 +732,7 @@ class Language:
|
|||
)
|
||||
pipe_index = self._get_pipe_index(before, after, first, last)
|
||||
self._pipe_meta[name] = self.get_factory_meta(factory_name)
|
||||
self.components.insert(pipe_index, (name, pipe_component))
|
||||
self._components.insert(pipe_index, (name, pipe_component))
|
||||
return pipe_component
|
||||
|
||||
def _get_pipe_index(
|
||||
|
@ -743,7 +757,7 @@ class Language:
|
|||
Errors.E006.format(args=all_args, opts=self.component_names)
|
||||
)
|
||||
if last or not any(value is not None for value in [first, before, after]):
|
||||
return len(self.components)
|
||||
return len(self._components)
|
||||
elif first:
|
||||
return 0
|
||||
elif isinstance(before, str):
|
||||
|
@ -761,14 +775,14 @@ class Language:
|
|||
# We're only accepting indices referring to components that exist
|
||||
# (can't just do isinstance here because bools are instance of int, too)
|
||||
elif type(before) == int:
|
||||
if before >= len(self.components) or before < 0:
|
||||
if before >= len(self._components) or before < 0:
|
||||
err = Errors.E959.format(
|
||||
dir="before", idx=before, opts=self.component_names
|
||||
)
|
||||
raise ValueError(err)
|
||||
return before
|
||||
elif type(after) == int:
|
||||
if after >= len(self.components) or after < 0:
|
||||
if after >= len(self._components) or after < 0:
|
||||
err = Errors.E959.format(
|
||||
dir="after", idx=after, opts=self.component_names
|
||||
)
|
||||
|
@ -815,7 +829,7 @@ class Language:
|
|||
# to Language.pipeline to make sure the configs are handled correctly
|
||||
pipe_index = self.pipe_names.index(name)
|
||||
self.remove_pipe(name)
|
||||
if not len(self.components) or pipe_index == len(self.components):
|
||||
if not len(self._components) or pipe_index == len(self._components):
|
||||
# we have no components to insert before/after, or we're replacing the last component
|
||||
self.add_pipe(factory_name, name=name, config=config, validate=validate)
|
||||
else:
|
||||
|
@ -844,7 +858,7 @@ class Language:
|
|||
Errors.E007.format(name=new_name, opts=self.component_names)
|
||||
)
|
||||
i = self.component_names.index(old_name)
|
||||
self.components[i] = (new_name, self.components[i][1])
|
||||
self._components[i] = (new_name, self._components[i][1])
|
||||
self._pipe_meta[new_name] = self._pipe_meta.pop(old_name)
|
||||
self._pipe_configs[new_name] = self._pipe_configs.pop(old_name)
|
||||
|
||||
|
@ -858,7 +872,7 @@ class Language:
|
|||
"""
|
||||
if name not in self.component_names:
|
||||
raise ValueError(Errors.E001.format(name=name, opts=self.component_names))
|
||||
removed = self.components.pop(self.component_names.index(name))
|
||||
removed = self._components.pop(self.component_names.index(name))
|
||||
# We're only removing the component itself from the metas/configs here
|
||||
# because factory may be used for something else
|
||||
self._pipe_meta.pop(name)
|
||||
|
@ -894,7 +908,7 @@ class Language:
|
|||
self,
|
||||
text: str,
|
||||
*,
|
||||
disable: Iterable[str] = tuple(),
|
||||
disable: Iterable[str] = SimpleFrozenList(),
|
||||
component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
|
||||
) -> Doc:
|
||||
"""Apply the pipeline to some text. The text can span multiple sentences,
|
||||
|
@ -993,7 +1007,7 @@ class Language:
|
|||
sgd: Optional[Optimizer] = None,
|
||||
losses: Optional[Dict[str, float]] = None,
|
||||
component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
|
||||
exclude: Iterable[str] = tuple(),
|
||||
exclude: Iterable[str] = SimpleFrozenList(),
|
||||
):
|
||||
"""Update the models in the pipeline.
|
||||
|
||||
|
@ -1047,7 +1061,7 @@ class Language:
|
|||
sgd: Optional[Optimizer] = None,
|
||||
losses: Optional[Dict[str, float]] = None,
|
||||
component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
|
||||
exclude: Iterable[str] = tuple(),
|
||||
exclude: Iterable[str] = SimpleFrozenList(),
|
||||
) -> Dict[str, float]:
|
||||
"""Make a "rehearsal" update to the models in the pipeline, to prevent
|
||||
forgetting. Rehearsal updates run an initial copy of the model over some
|
||||
|
@ -1276,7 +1290,7 @@ class Language:
|
|||
*,
|
||||
as_tuples: bool = False,
|
||||
batch_size: int = 1000,
|
||||
disable: Iterable[str] = tuple(),
|
||||
disable: Iterable[str] = SimpleFrozenList(),
|
||||
cleanup: bool = False,
|
||||
component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
|
||||
n_process: int = 1,
|
||||
|
@ -1436,8 +1450,8 @@ class Language:
|
|||
config: Union[Dict[str, Any], Config] = {},
|
||||
*,
|
||||
vocab: Union[Vocab, bool] = True,
|
||||
disable: Iterable[str] = tuple(),
|
||||
exclude: Iterable[str] = tuple(),
|
||||
disable: Iterable[str] = SimpleFrozenList(),
|
||||
exclude: Iterable[str] = SimpleFrozenList(),
|
||||
auto_fill: bool = True,
|
||||
validate: bool = True,
|
||||
) -> "Language":
|
||||
|
@ -1562,7 +1576,7 @@ class Language:
|
|||
return nlp
|
||||
|
||||
def to_disk(
|
||||
self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()
|
||||
self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
|
||||
) -> None:
|
||||
"""Save the current state to a directory. If a model is loaded, this
|
||||
will include the model.
|
||||
|
@ -1580,7 +1594,7 @@ class Language:
|
|||
)
|
||||
serializers["meta.json"] = lambda p: srsly.write_json(p, self.meta)
|
||||
serializers["config.cfg"] = lambda p: self.config.to_disk(p)
|
||||
for name, proc in self.components:
|
||||
for name, proc in self._components:
|
||||
if name in exclude:
|
||||
continue
|
||||
if not hasattr(proc, "to_disk"):
|
||||
|
@ -1590,7 +1604,7 @@ class Language:
|
|||
util.to_disk(path, serializers, exclude)
|
||||
|
||||
def from_disk(
|
||||
self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()
|
||||
self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
|
||||
) -> "Language":
|
||||
"""Loads state from a directory. Modifies the object in place and
|
||||
returns it. If the saved `Language` object contains a model, the
|
||||
|
@ -1624,7 +1638,7 @@ class Language:
|
|||
deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk(
|
||||
p, exclude=["vocab"]
|
||||
)
|
||||
for name, proc in self.components:
|
||||
for name, proc in self._components:
|
||||
if name in exclude:
|
||||
continue
|
||||
if not hasattr(proc, "from_disk"):
|
||||
|
@ -1640,7 +1654,7 @@ class Language:
|
|||
self._link_components()
|
||||
return self
|
||||
|
||||
def to_bytes(self, *, exclude: Iterable[str] = tuple()) -> bytes:
|
||||
def to_bytes(self, *, exclude: Iterable[str] = SimpleFrozenList()) -> bytes:
|
||||
"""Serialize the current state to a binary string.
|
||||
|
||||
exclude (list): Names of components or serialization fields to exclude.
|
||||
|
@ -1653,7 +1667,7 @@ class Language:
|
|||
serializers["tokenizer"] = lambda: self.tokenizer.to_bytes(exclude=["vocab"])
|
||||
serializers["meta.json"] = lambda: srsly.json_dumps(self.meta)
|
||||
serializers["config.cfg"] = lambda: self.config.to_bytes()
|
||||
for name, proc in self.components:
|
||||
for name, proc in self._components:
|
||||
if name in exclude:
|
||||
continue
|
||||
if not hasattr(proc, "to_bytes"):
|
||||
|
@ -1662,7 +1676,7 @@ class Language:
|
|||
return util.to_bytes(serializers, exclude)
|
||||
|
||||
def from_bytes(
|
||||
self, bytes_data: bytes, *, exclude: Iterable[str] = tuple()
|
||||
self, bytes_data: bytes, *, exclude: Iterable[str] = SimpleFrozenList()
|
||||
) -> "Language":
|
||||
"""Load state from a binary string.
|
||||
|
||||
|
@ -1687,7 +1701,7 @@ class Language:
|
|||
deserializers["tokenizer"] = lambda b: self.tokenizer.from_bytes(
|
||||
b, exclude=["vocab"]
|
||||
)
|
||||
for name, proc in self.components:
|
||||
for name, proc in self._components:
|
||||
if name in exclude:
|
||||
continue
|
||||
if not hasattr(proc, "from_bytes"):
|
||||
|
|
|
@ -12,6 +12,7 @@ from ..symbols import IDS, TAG, POS, MORPH, LEMMA
|
|||
from ..tokens import Doc, Span
|
||||
from ..tokens._retokenize import normalize_token_attrs, set_token_attrs
|
||||
from ..vocab import Vocab
|
||||
from ..util import SimpleFrozenList
|
||||
from .. import util
|
||||
|
||||
|
||||
|
@ -220,7 +221,7 @@ class AttributeRuler(Pipe):
|
|||
results.update(Scorer.score_token_attr(examples, "lemma", **kwargs))
|
||||
return results
|
||||
|
||||
def to_bytes(self, exclude: Iterable[str] = tuple()) -> bytes:
|
||||
def to_bytes(self, exclude: Iterable[str] = SimpleFrozenList()) -> bytes:
|
||||
"""Serialize the AttributeRuler to a bytestring.
|
||||
|
||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
|
@ -236,7 +237,9 @@ class AttributeRuler(Pipe):
|
|||
serialize["indices"] = lambda: srsly.msgpack_dumps(self.indices)
|
||||
return util.to_bytes(serialize, exclude)
|
||||
|
||||
def from_bytes(self, bytes_data: bytes, exclude: Iterable[str] = tuple()):
|
||||
def from_bytes(
|
||||
self, bytes_data: bytes, exclude: Iterable[str] = SimpleFrozenList()
|
||||
):
|
||||
"""Load the AttributeRuler from a bytestring.
|
||||
|
||||
bytes_data (bytes): The data to load.
|
||||
|
@ -272,7 +275,9 @@ class AttributeRuler(Pipe):
|
|||
|
||||
return self
|
||||
|
||||
def to_disk(self, path: Union[Path, str], exclude: Iterable[str] = tuple()) -> None:
|
||||
def to_disk(
|
||||
self, path: Union[Path, str], exclude: Iterable[str] = SimpleFrozenList()
|
||||
) -> None:
|
||||
"""Serialize the AttributeRuler to disk.
|
||||
|
||||
path (Union[Path, str]): A path to a directory.
|
||||
|
@ -289,7 +294,7 @@ class AttributeRuler(Pipe):
|
|||
util.to_disk(path, serialize, exclude)
|
||||
|
||||
def from_disk(
|
||||
self, path: Union[Path, str], exclude: Iterable[str] = tuple()
|
||||
self, path: Union[Path, str], exclude: Iterable[str] = SimpleFrozenList()
|
||||
) -> None:
|
||||
"""Load the AttributeRuler from disk.
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ from ..language import Language
|
|||
from ..vocab import Vocab
|
||||
from ..gold import Example, validate_examples
|
||||
from ..errors import Errors, Warnings
|
||||
from ..util import SimpleFrozenList
|
||||
from .. import util
|
||||
|
||||
|
||||
|
@ -404,7 +405,7 @@ class EntityLinker(Pipe):
|
|||
token.ent_kb_id_ = kb_id
|
||||
|
||||
def to_disk(
|
||||
self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()
|
||||
self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList(),
|
||||
) -> None:
|
||||
"""Serialize the pipe to disk.
|
||||
|
||||
|
@ -421,7 +422,7 @@ class EntityLinker(Pipe):
|
|||
util.to_disk(path, serialize, exclude)
|
||||
|
||||
def from_disk(
|
||||
self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()
|
||||
self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList(),
|
||||
) -> "EntityLinker":
|
||||
"""Load the pipe from disk. Modifies the object in place and returns it.
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ import srsly
|
|||
|
||||
from ..language import Language
|
||||
from ..errors import Errors
|
||||
from ..util import ensure_path, to_disk, from_disk
|
||||
from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList
|
||||
from ..tokens import Doc, Span
|
||||
from ..matcher import Matcher, PhraseMatcher
|
||||
from ..scorer import Scorer
|
||||
|
@ -317,7 +317,7 @@ class EntityRuler:
|
|||
return Scorer.score_spans(examples, "ents", **kwargs)
|
||||
|
||||
def from_bytes(
|
||||
self, patterns_bytes: bytes, *, exclude: Iterable[str] = tuple()
|
||||
self, patterns_bytes: bytes, *, exclude: Iterable[str] = SimpleFrozenList()
|
||||
) -> "EntityRuler":
|
||||
"""Load the entity ruler from a bytestring.
|
||||
|
||||
|
@ -341,7 +341,7 @@ class EntityRuler:
|
|||
self.add_patterns(cfg)
|
||||
return self
|
||||
|
||||
def to_bytes(self, *, exclude: Iterable[str] = tuple()) -> bytes:
|
||||
def to_bytes(self, *, exclude: Iterable[str] = SimpleFrozenList()) -> bytes:
|
||||
"""Serialize the entity ruler patterns to a bytestring.
|
||||
|
||||
RETURNS (bytes): The serialized patterns.
|
||||
|
@ -357,7 +357,7 @@ class EntityRuler:
|
|||
return srsly.msgpack_dumps(serial)
|
||||
|
||||
def from_disk(
|
||||
self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()
|
||||
self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
|
||||
) -> "EntityRuler":
|
||||
"""Load the entity ruler from a file. Expects a file containing
|
||||
newline-delimited JSON (JSONL) with one entry per line.
|
||||
|
@ -394,7 +394,7 @@ class EntityRuler:
|
|||
return self
|
||||
|
||||
def to_disk(
|
||||
self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()
|
||||
self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
|
||||
) -> None:
|
||||
"""Save the entity ruler patterns to a directory. The patterns will be
|
||||
saved as newline-delimited JSON (JSONL).
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from typing import Optional, Iterable, Dict, Any, Callable, Tuple, TYPE_CHECKING
|
||||
from typing import Optional, Iterable, Dict, Any, Callable, TYPE_CHECKING
|
||||
import numpy as np
|
||||
|
||||
from .gold import Example
|
||||
from .tokens import Token, Doc, Span
|
||||
from .errors import Errors
|
||||
from .util import get_lang_class
|
||||
from .util import get_lang_class, SimpleFrozenList
|
||||
from .morphology import Morphology
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
@ -317,7 +317,7 @@ class Scorer:
|
|||
attr: str,
|
||||
*,
|
||||
getter: Callable[[Doc, str], Any] = getattr,
|
||||
labels: Iterable[str] = tuple(),
|
||||
labels: Iterable[str] = SimpleFrozenList(),
|
||||
multi_label: bool = True,
|
||||
positive_label: Optional[str] = None,
|
||||
threshold: Optional[float] = None,
|
||||
|
@ -447,7 +447,7 @@ class Scorer:
|
|||
getter: Callable[[Token, str], Any] = getattr,
|
||||
head_attr: str = "head",
|
||||
head_getter: Callable[[Token, str], Token] = getattr,
|
||||
ignore_labels: Tuple[str] = tuple(),
|
||||
ignore_labels: Iterable[str] = SimpleFrozenList(),
|
||||
**cfg,
|
||||
) -> Dict[str, Any]:
|
||||
"""Returns the UAS, LAS, and LAS per type scores for dependency
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import pytest
|
||||
from spacy.language import Language
|
||||
from spacy.util import SimpleFrozenList
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -317,3 +318,31 @@ def test_disable_enable_pipes():
|
|||
assert nlp.config["nlp"]["disabled"] == [name]
|
||||
nlp("?")
|
||||
assert results[f"{name}1"] == "!"
|
||||
|
||||
|
||||
def test_pipe_methods_frozen():
|
||||
"""Test that spaCy raises custom error messages if "frozen" properties are
|
||||
accessed. We still want to use a list here to not break backwards
|
||||
compatibility, but users should see an error if they're trying to append
|
||||
to nlp.pipeline etc."""
|
||||
nlp = Language()
|
||||
ner = nlp.add_pipe("ner")
|
||||
assert nlp.pipe_names == ["ner"]
|
||||
for prop in [
|
||||
nlp.pipeline,
|
||||
nlp.pipe_names,
|
||||
nlp.components,
|
||||
nlp.component_names,
|
||||
nlp.disabled,
|
||||
nlp.factory_names,
|
||||
]:
|
||||
assert isinstance(prop, list)
|
||||
assert isinstance(prop, SimpleFrozenList)
|
||||
with pytest.raises(NotImplementedError):
|
||||
nlp.pipeline.append(("ner2", ner))
|
||||
with pytest.raises(NotImplementedError):
|
||||
nlp.pipe_names.pop()
|
||||
with pytest.raises(NotImplementedError):
|
||||
nlp.components.sort()
|
||||
with pytest.raises(NotImplementedError):
|
||||
nlp.component_names.clear()
|
||||
|
|
|
@ -3,10 +3,9 @@ import pytest
|
|||
from .util import get_random_doc
|
||||
|
||||
from spacy import util
|
||||
from spacy.util import dot_to_object
|
||||
from spacy.util import dot_to_object, SimpleFrozenList
|
||||
from thinc.api import Config, Optimizer
|
||||
from spacy.gold.batchers import minibatch_by_words
|
||||
|
||||
from ..lang.en import English
|
||||
from ..lang.nl import Dutch
|
||||
from ..language import DEFAULT_CONFIG_PATH
|
||||
|
@ -106,3 +105,20 @@ def test_util_dot_section():
|
|||
assert not dot_to_object(en_config, "nlp.load_vocab_data")
|
||||
assert dot_to_object(nl_config, "nlp.load_vocab_data")
|
||||
assert isinstance(dot_to_object(nl_config, "training.optimizer"), Optimizer)
|
||||
|
||||
|
||||
def test_simple_frozen_list():
|
||||
t = SimpleFrozenList(["foo", "bar"])
|
||||
assert t == ["foo", "bar"]
|
||||
assert t.index("bar") == 1 # okay method
|
||||
with pytest.raises(NotImplementedError):
|
||||
t.append("baz")
|
||||
with pytest.raises(NotImplementedError):
|
||||
t.sort()
|
||||
with pytest.raises(NotImplementedError):
|
||||
t.extend(["baz"])
|
||||
with pytest.raises(NotImplementedError):
|
||||
t.pop()
|
||||
t = SimpleFrozenList(["foo", "bar"], error="Error!")
|
||||
with pytest.raises(NotImplementedError):
|
||||
t.append("baz")
|
||||
|
|
|
@ -10,7 +10,7 @@ from ..vocab import Vocab
|
|||
from ..compat import copy_reg
|
||||
from ..attrs import SPACY, ORTH, intify_attr
|
||||
from ..errors import Errors
|
||||
from ..util import ensure_path
|
||||
from ..util import ensure_path, SimpleFrozenList
|
||||
|
||||
# fmt: off
|
||||
ALL_ATTRS = ("ORTH", "TAG", "HEAD", "DEP", "ENT_IOB", "ENT_TYPE", "ENT_KB_ID", "LEMMA", "MORPH", "POS")
|
||||
|
@ -52,7 +52,7 @@ class DocBin:
|
|||
self,
|
||||
attrs: Iterable[str] = ALL_ATTRS,
|
||||
store_user_data: bool = False,
|
||||
docs: Iterable[Doc] = tuple(),
|
||||
docs: Iterable[Doc] = SimpleFrozenList(),
|
||||
) -> None:
|
||||
"""Create a DocBin object to hold serialized annotations.
|
||||
|
||||
|
|
|
@ -120,6 +120,47 @@ class SimpleFrozenDict(dict):
|
|||
raise NotImplementedError(self.error)
|
||||
|
||||
|
||||
class SimpleFrozenList(list):
|
||||
"""Wrapper class around a list that lets us raise custom errors if certain
|
||||
attributes/methods are accessed. Mostly used for properties like
|
||||
Language.pipeline that return an immutable list (and that we don't want to
|
||||
convert to a tuple to not break too much backwards compatibility). If a user
|
||||
accidentally calls nlp.pipeline.append(), we can raise a more helpful error.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, error: str = Errors.E927) -> None:
|
||||
"""Initialize the frozen list.
|
||||
|
||||
error (str): The error message when user tries to mutate the list.
|
||||
"""
|
||||
self.error = error
|
||||
super().__init__(*args)
|
||||
|
||||
def append(self, *args, **kwargs):
|
||||
raise NotImplementedError(self.error)
|
||||
|
||||
def clear(self, *args, **kwargs):
|
||||
raise NotImplementedError(self.error)
|
||||
|
||||
def extend(self, *args, **kwargs):
|
||||
raise NotImplementedError(self.error)
|
||||
|
||||
def insert(self, *args, **kwargs):
|
||||
raise NotImplementedError(self.error)
|
||||
|
||||
def pop(self, *args, **kwargs):
|
||||
raise NotImplementedError(self.error)
|
||||
|
||||
def remove(self, *args, **kwargs):
|
||||
raise NotImplementedError(self.error)
|
||||
|
||||
def reverse(self, *args, **kwargs):
|
||||
raise NotImplementedError(self.error)
|
||||
|
||||
def sort(self, *args, **kwargs):
|
||||
raise NotImplementedError(self.error)
|
||||
|
||||
|
||||
def lang_class_is_loaded(lang: str) -> bool:
|
||||
"""Check whether a Language class is already loaded. Language classes are
|
||||
loaded lazily, to avoid expensive setup code associated with the language
|
||||
|
@ -215,8 +256,8 @@ def load_model(
|
|||
name: Union[str, Path],
|
||||
*,
|
||||
vocab: Union["Vocab", bool] = True,
|
||||
disable: Iterable[str] = tuple(),
|
||||
exclude: Iterable[str] = tuple(),
|
||||
disable: Iterable[str] = SimpleFrozenList(),
|
||||
exclude: Iterable[str] = SimpleFrozenList(),
|
||||
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
||||
) -> "Language":
|
||||
"""Load a model from a package or data path.
|
||||
|
@ -248,8 +289,8 @@ def load_model_from_package(
|
|||
name: str,
|
||||
*,
|
||||
vocab: Union["Vocab", bool] = True,
|
||||
disable: Iterable[str] = tuple(),
|
||||
exclude: Iterable[str] = tuple(),
|
||||
disable: Iterable[str] = SimpleFrozenList(),
|
||||
exclude: Iterable[str] = SimpleFrozenList(),
|
||||
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
||||
) -> "Language":
|
||||
"""Load a model from an installed package.
|
||||
|
@ -275,8 +316,8 @@ def load_model_from_path(
|
|||
*,
|
||||
meta: Optional[Dict[str, Any]] = None,
|
||||
vocab: Union["Vocab", bool] = True,
|
||||
disable: Iterable[str] = tuple(),
|
||||
exclude: Iterable[str] = tuple(),
|
||||
disable: Iterable[str] = SimpleFrozenList(),
|
||||
exclude: Iterable[str] = SimpleFrozenList(),
|
||||
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
||||
) -> "Language":
|
||||
"""Load a model from a data directory path. Creates Language class with
|
||||
|
@ -311,8 +352,8 @@ def load_model_from_config(
|
|||
config: Union[Dict[str, Any], Config],
|
||||
*,
|
||||
vocab: Union["Vocab", bool] = True,
|
||||
disable: Iterable[str] = tuple(),
|
||||
exclude: Iterable[str] = tuple(),
|
||||
disable: Iterable[str] = SimpleFrozenList(),
|
||||
exclude: Iterable[str] = SimpleFrozenList(),
|
||||
auto_fill: bool = False,
|
||||
validate: bool = True,
|
||||
) -> Tuple["Language", Config]:
|
||||
|
@ -355,8 +396,8 @@ def load_model_from_init_py(
|
|||
init_file: Union[Path, str],
|
||||
*,
|
||||
vocab: Union["Vocab", bool] = True,
|
||||
disable: Iterable[str] = tuple(),
|
||||
exclude: Iterable[str] = tuple(),
|
||||
disable: Iterable[str] = SimpleFrozenList(),
|
||||
exclude: Iterable[str] = SimpleFrozenList(),
|
||||
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
||||
) -> "Language":
|
||||
"""Helper function to use in the `load()` method of a model package's
|
||||
|
|
Loading…
Reference in New Issue
Block a user