This commit is contained in:
Matthew Honnibal 2023-06-10 16:56:10 +02:00
parent 4332d12ce2
commit b9730a64cb
3 changed files with 39 additions and 32 deletions

View File

@ -801,8 +801,11 @@ class Language:
self._components.insert(pipe_index, (name, pipe_component)) self._components.insert(pipe_index, (name, pipe_component))
return pipe_component return pipe_component
def add_pipe_instance(self, component: PipeCallable, def add_pipe_instance(
/, name: Optional[str] = None, self,
component: PipeCallable,
/,
name: Optional[str] = None,
*, *,
before: Optional[Union[str, int]] = None, before: Optional[Union[str, int]] = None,
after: Optional[Union[str, int]] = None, after: Optional[Union[str, int]] = None,
@ -1743,7 +1746,7 @@ class Language:
meta: Dict[str, Any] = SimpleFrozenDict(), meta: Dict[str, Any] = SimpleFrozenDict(),
auto_fill: bool = True, auto_fill: bool = True,
validate: bool = True, validate: bool = True,
pipe_instances: Dict[str, Any] = SimpleFrozenDict() pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
) -> "Language": ) -> "Language":
"""Create the nlp object from a loaded config. Will set up the tokenizer """Create the nlp object from a loaded config. Will set up the tokenizer
and language data, add pipeline components etc. If no config is provided, and language data, add pipeline components etc. If no config is provided,
@ -1844,7 +1847,7 @@ class Language:
# and aren't built by factory. # and aren't built by factory.
missing_components = _find_missing_components(pipeline, pipe_instances, exclude) missing_components = _find_missing_components(pipeline, pipe_instances, exclude)
if missing_components: if missing_components:
raise ValueError(Errors.E1052.format(", ",join(missing_components))) raise ValueError(Errors.E1052.format(", ", join(missing_components)))
# If components are loaded from a source (existing models), we cache # If components are loaded from a source (existing models), we cache
# them here so they're only loaded once # them here so they're only loaded once
source_nlps = {} source_nlps = {}
@ -1858,9 +1861,7 @@ class Language:
if pipe_name in exclude: if pipe_name in exclude:
continue continue
else: else:
nlp.add_pipe_instance( nlp.add_pipe_instance(pipe_instances[pipe_name])
pipe_instances[pipe_name]
)
# Is it important that we instantiate pipes that # Is it important that we instantiate pipes that
# aren't excluded? It seems like we would want # aren't excluded? It seems like we would want
# the exclude check above. I've left it how it # the exclude check above. I've left it how it
@ -2384,7 +2385,9 @@ class _Sender:
self.send() self.send()
def _get_instantiated_vocab(vocab: Union[bool, Vocab], pipe_instances: Dict[str, Any]) -> Union[bool, Vocab]: def _get_instantiated_vocab(
vocab: Union[bool, Vocab], pipe_instances: Dict[str, Any]
) -> Union[bool, Vocab]:
vocab_instances = {} vocab_instances = {}
for name, instance in pipe_instances.items(): for name, instance in pipe_instances.items():
if hasattr(instance, "vocab") and isinstance(instance.vocab, Vocab): if hasattr(instance, "vocab") and isinstance(instance.vocab, Vocab):
@ -2410,8 +2413,8 @@ def _get_instantiated_vocab(vocab: Union[bool, Vocab], pipe_instances: Dict[str,
def _find_missing_components( def _find_missing_components(
pipeline: List[str], pipeline: List[str], pipe_instances: Dict[str, Any], exclude: List[str]
pipe_instances: Dict[str, Any],
exclude: List[str]
) -> List[str]: ) -> List[str]:
return [name for name in pipeline if name not in pipe_instances and name not in exclude] return [
name for name in pipeline if name not in pipe_instances and name not in exclude
]

View File

@ -801,15 +801,18 @@ def test_component_return():
nlp("text") nlp("text")
@pytest.mark.parametrize("components,kwargs,position", [ @pytest.mark.parametrize(
(["t1", "t2"], {"before": "t1"}, 0), "components,kwargs,position",
(["t1", "t2"], {"after": "t1"}, 1), [
(["t1", "t2"], {"after": "t1"}, 1), (["t1", "t2"], {"before": "t1"}, 0),
(["t1", "t2"], {"first": True}, 0), (["t1", "t2"], {"after": "t1"}, 1),
(["t1", "t2"], {"last": True}, 2), (["t1", "t2"], {"after": "t1"}, 1),
(["t1", "t2"], {"last": False}, 2), (["t1", "t2"], {"first": True}, 0),
(["t1", "t2"], {"first": False}, ValueError), (["t1", "t2"], {"last": True}, 2),
]) (["t1", "t2"], {"last": False}, 2),
(["t1", "t2"], {"first": False}, ValueError),
],
)
def test_add_pipe_instance(components, kwargs, position): def test_add_pipe_instance(components, kwargs, position):
nlp = Language() nlp = Language()
for name in components: for name in components:
@ -822,7 +825,9 @@ def test_add_pipe_instance(components, kwargs, position):
assert nlp.pipe_names == pipe_names assert nlp.pipe_names == pipe_names
else: else:
with pytest.raises(ValueError): with pytest.raises(ValueError):
result = nlp.add_pipe_instance(evil_component, name="new_component", **kwargs) result = nlp.add_pipe_instance(
evil_component, name="new_component", **kwargs
)
def test_add_pipe_instance_to_bytes(): def test_add_pipe_instance_to_bytes():
@ -831,4 +836,3 @@ def test_add_pipe_instance_to_bytes():
nlp.add_pipe("textcat", name="t2") nlp.add_pipe("textcat", name="t2")
nlp.add_pipe_instance(evil_component, name="new_component") nlp.add_pipe_instance(evil_component, name="new_component")
b = nlp.to_bytes() b = nlp.to_bytes()

View File

@ -415,7 +415,7 @@ def load_model(
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
pipe_instances: Dict[str, Any] = SimpleFrozenDict() pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
) -> "Language": ) -> "Language":
"""Load a model from a package or data path. """Load a model from a package or data path.
@ -427,7 +427,7 @@ def load_model(
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation. keyed by section values in dot notation.
pipe_instances (Dict[str, Any]): Dictionary of components pipe_instances (Dict[str, Any]): Dictionary of components
to be added to the pipeline directly (not created from to be added to the pipeline directly (not created from
config) config)
RETURNS (Language): The loaded nlp object. RETURNS (Language): The loaded nlp object.
@ -438,7 +438,7 @@ def load_model(
"enable": enable, "enable": enable,
"exclude": exclude, "exclude": exclude,
"config": config, "config": config,
"pipe_instances": pipe_instances "pipe_instances": pipe_instances,
} }
if isinstance(name, str): # name or string path if isinstance(name, str): # name or string path
if name.startswith("blank:"): # shortcut for blank model if name.startswith("blank:"): # shortcut for blank model
@ -462,7 +462,7 @@ def load_model_from_package(
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
pipe_instances: Dict[str, Any] = SimpleFrozenDict() pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
) -> "Language": ) -> "Language":
"""Load a model from an installed package. """Load a model from an installed package.
@ -478,7 +478,7 @@ def load_model_from_package(
components won't be loaded. components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation. keyed by section values in dot notation.
pipe_instances (Dict[str, Any]): Dictionary of components pipe_instances (Dict[str, Any]): Dictionary of components
to be added to the pipeline directly (not created from to be added to the pipeline directly (not created from
config) config)
RETURNS (Language): The loaded nlp object. RETURNS (Language): The loaded nlp object.
@ -496,7 +496,7 @@ def load_model_from_path(
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
pipe_instances: Dict[str, Any] = SimpleFrozenDict() pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
) -> "Language": ) -> "Language":
"""Load a model from a data directory path. Creates Language class with """Load a model from a data directory path. Creates Language class with
pipeline from config.cfg and then calls from_disk() with path. pipeline from config.cfg and then calls from_disk() with path.
@ -533,7 +533,7 @@ def load_model_from_path(
enable=enable, enable=enable,
exclude=exclude, exclude=exclude,
meta=meta, meta=meta,
pipe_instances=pipe_instances pipe_instances=pipe_instances,
) )
return nlp.from_disk(model_path, exclude=exclude, overrides=overrides) return nlp.from_disk(model_path, exclude=exclude, overrides=overrides)
@ -548,7 +548,7 @@ def load_model_from_config(
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
auto_fill: bool = False, auto_fill: bool = False,
validate: bool = True, validate: bool = True,
pipe_instances: Dict[str, Any] = SimpleFrozenDict() pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
) -> "Language": ) -> "Language":
"""Create an nlp object from a config. Expects the full config file including """Create an nlp object from a config. Expects the full config file including
a section "nlp" containing the settings for the nlp object. a section "nlp" containing the settings for the nlp object.
@ -588,7 +588,7 @@ def load_model_from_config(
auto_fill=auto_fill, auto_fill=auto_fill,
validate=validate, validate=validate,
meta=meta, meta=meta,
pipe_instances=pipe_instances pipe_instances=pipe_instances,
) )
return nlp return nlp