This commit is contained in:
Matthew Honnibal 2023-06-10 16:56:10 +02:00
parent 4332d12ce2
commit b9730a64cb
3 changed files with 39 additions and 32 deletions

View File

@ -801,8 +801,11 @@ class Language:
self._components.insert(pipe_index, (name, pipe_component))
return pipe_component
def add_pipe_instance(self, component: PipeCallable,
/, name: Optional[str] = None,
def add_pipe_instance(
self,
component: PipeCallable,
/,
name: Optional[str] = None,
*,
before: Optional[Union[str, int]] = None,
after: Optional[Union[str, int]] = None,
@ -1743,7 +1746,7 @@ class Language:
meta: Dict[str, Any] = SimpleFrozenDict(),
auto_fill: bool = True,
validate: bool = True,
pipe_instances: Dict[str, Any] = SimpleFrozenDict()
pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
) -> "Language":
"""Create the nlp object from a loaded config. Will set up the tokenizer
and language data, add pipeline components etc. If no config is provided,
@ -1844,7 +1847,7 @@ class Language:
# and aren't built by factory.
missing_components = _find_missing_components(pipeline, pipe_instances, exclude)
if missing_components:
raise ValueError(Errors.E1052.format(", ",join(missing_components)))
raise ValueError(Errors.E1052.format(", ", join(missing_components)))
# If components are loaded from a source (existing models), we cache
# them here so they're only loaded once
source_nlps = {}
@ -1858,9 +1861,7 @@ class Language:
if pipe_name in exclude:
continue
else:
nlp.add_pipe_instance(
pipe_instances[pipe_name]
)
nlp.add_pipe_instance(pipe_instances[pipe_name])
# Is it important that we instantiate pipes that
# aren't excluded? It seems like we would want
# the exclude check above. I've left it how it
@ -2384,7 +2385,9 @@ class _Sender:
self.send()
def _get_instantiated_vocab(vocab: Union[bool, Vocab], pipe_instances: Dict[str, Any]) -> Union[bool, Vocab]:
def _get_instantiated_vocab(
vocab: Union[bool, Vocab], pipe_instances: Dict[str, Any]
) -> Union[bool, Vocab]:
vocab_instances = {}
for name, instance in pipe_instances.items():
if hasattr(instance, "vocab") and isinstance(instance.vocab, Vocab):
@ -2410,8 +2413,8 @@ def _get_instantiated_vocab(vocab: Union[bool, Vocab], pipe_instances: Dict[str,
def _find_missing_components(
pipeline: List[str],
pipe_instances: Dict[str, Any],
exclude: List[str]
pipeline: List[str], pipe_instances: Dict[str, Any], exclude: List[str]
) -> List[str]:
return [name for name in pipeline if name not in pipe_instances and name not in exclude]
return [
name for name in pipeline if name not in pipe_instances and name not in exclude
]

View File

@ -801,15 +801,18 @@ def test_component_return():
nlp("text")
@pytest.mark.parametrize("components,kwargs,position", [
(["t1", "t2"], {"before": "t1"}, 0),
(["t1", "t2"], {"after": "t1"}, 1),
(["t1", "t2"], {"after": "t1"}, 1),
(["t1", "t2"], {"first": True}, 0),
(["t1", "t2"], {"last": True}, 2),
(["t1", "t2"], {"last": False}, 2),
(["t1", "t2"], {"first": False}, ValueError),
])
@pytest.mark.parametrize(
"components,kwargs,position",
[
(["t1", "t2"], {"before": "t1"}, 0),
(["t1", "t2"], {"after": "t1"}, 1),
(["t1", "t2"], {"after": "t1"}, 1),
(["t1", "t2"], {"first": True}, 0),
(["t1", "t2"], {"last": True}, 2),
(["t1", "t2"], {"last": False}, 2),
(["t1", "t2"], {"first": False}, ValueError),
],
)
def test_add_pipe_instance(components, kwargs, position):
nlp = Language()
for name in components:
@ -822,7 +825,9 @@ def test_add_pipe_instance(components, kwargs, position):
assert nlp.pipe_names == pipe_names
else:
with pytest.raises(ValueError):
result = nlp.add_pipe_instance(evil_component, name="new_component", **kwargs)
result = nlp.add_pipe_instance(
evil_component, name="new_component", **kwargs
)
def test_add_pipe_instance_to_bytes():
@ -831,4 +836,3 @@ def test_add_pipe_instance_to_bytes():
nlp.add_pipe("textcat", name="t2")
nlp.add_pipe_instance(evil_component, name="new_component")
b = nlp.to_bytes()

View File

@ -415,7 +415,7 @@ def load_model(
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
pipe_instances: Dict[str, Any] = SimpleFrozenDict()
pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
) -> "Language":
"""Load a model from a package or data path.
@ -427,7 +427,7 @@ def load_model(
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
pipe_instances (Dict[str, Any]): Dictionary of components
pipe_instances (Dict[str, Any]): Dictionary of components
to be added to the pipeline directly (not created from
config)
RETURNS (Language): The loaded nlp object.
@ -438,7 +438,7 @@ def load_model(
"enable": enable,
"exclude": exclude,
"config": config,
"pipe_instances": pipe_instances
"pipe_instances": pipe_instances,
}
if isinstance(name, str): # name or string path
if name.startswith("blank:"): # shortcut for blank model
@ -462,7 +462,7 @@ def load_model_from_package(
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
pipe_instances: Dict[str, Any] = SimpleFrozenDict()
pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
) -> "Language":
"""Load a model from an installed package.
@ -478,7 +478,7 @@ def load_model_from_package(
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
pipe_instances (Dict[str, Any]): Dictionary of components
pipe_instances (Dict[str, Any]): Dictionary of components
to be added to the pipeline directly (not created from
config)
RETURNS (Language): The loaded nlp object.
@ -496,7 +496,7 @@ def load_model_from_path(
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
pipe_instances: Dict[str, Any] = SimpleFrozenDict()
pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
) -> "Language":
"""Load a model from a data directory path. Creates Language class with
pipeline from config.cfg and then calls from_disk() with path.
@ -533,7 +533,7 @@ def load_model_from_path(
enable=enable,
exclude=exclude,
meta=meta,
pipe_instances=pipe_instances
pipe_instances=pipe_instances,
)
return nlp.from_disk(model_path, exclude=exclude, overrides=overrides)
@ -548,7 +548,7 @@ def load_model_from_config(
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
auto_fill: bool = False,
validate: bool = True,
pipe_instances: Dict[str, Any] = SimpleFrozenDict()
pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
) -> "Language":
"""Create an nlp object from a config. Expects the full config file including
a section "nlp" containing the settings for the nlp object.
@ -588,7 +588,7 @@ def load_model_from_config(
auto_fill=auto_fill,
validate=validate,
meta=meta,
pipe_instances=pipe_instances
pipe_instances=pipe_instances,
)
return nlp