mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 12:50:20 +03:00
Merge 0fd797e33c
into ef20e114e0
This commit is contained in:
commit
708ee32f9f
|
@ -32,6 +32,7 @@ def load(
|
||||||
enable: Union[str, Iterable[str]] = util._DEFAULT_EMPTY_PIPES,
|
enable: Union[str, Iterable[str]] = util._DEFAULT_EMPTY_PIPES,
|
||||||
exclude: Union[str, Iterable[str]] = util._DEFAULT_EMPTY_PIPES,
|
exclude: Union[str, Iterable[str]] = util._DEFAULT_EMPTY_PIPES,
|
||||||
config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
|
config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
|
||||||
|
pipe_instances: Dict[str, Any] = util.SimpleFrozenDict(),
|
||||||
) -> Language:
|
) -> Language:
|
||||||
"""Load a spaCy model from an installed package or a local path.
|
"""Load a spaCy model from an installed package or a local path.
|
||||||
|
|
||||||
|
@ -55,6 +56,7 @@ def load(
|
||||||
enable=enable,
|
enable=enable,
|
||||||
exclude=exclude,
|
exclude=exclude,
|
||||||
config=config,
|
config=config,
|
||||||
|
pipe_instances=pipe_instances,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -219,6 +219,9 @@ class Warnings(metaclass=ErrorsWithCodes):
|
||||||
W125 = ("The StaticVectors key_attr is no longer used. To set a custom "
|
W125 = ("The StaticVectors key_attr is no longer used. To set a custom "
|
||||||
"key attribute for vectors, configure it through Vectors(attr=) or "
|
"key attribute for vectors, configure it through Vectors(attr=) or "
|
||||||
"'spacy init vectors --attr'")
|
"'spacy init vectors --attr'")
|
||||||
|
W126 = ("Pipe instance '{name}' is being added with a vocab "
|
||||||
|
"instance that will not match other components. This is "
|
||||||
|
"usually an error.")
|
||||||
|
|
||||||
|
|
||||||
class Errors(metaclass=ErrorsWithCodes):
|
class Errors(metaclass=ErrorsWithCodes):
|
||||||
|
@ -981,6 +984,7 @@ class Errors(metaclass=ErrorsWithCodes):
|
||||||
" 'min_length': {min_length}, 'max_length': {max_length}")
|
" 'min_length': {min_length}, 'max_length': {max_length}")
|
||||||
E1054 = ("The text, including whitespace, must match between reference and "
|
E1054 = ("The text, including whitespace, must match between reference and "
|
||||||
"predicted docs when training {component}.")
|
"predicted docs when training {component}.")
|
||||||
|
E1055 = ("Cannot create Language instance from config: missing pipeline components. The following components were added by instance (rather than config) via the 'Language.add_pipe_instance()' method, but are not present in the 'pipe_instances' variable: {names}")
|
||||||
|
|
||||||
|
|
||||||
# Deprecated model shortcuts, only used in errors and warnings
|
# Deprecated model shortcuts, only used in errors and warnings
|
||||||
|
|
|
@ -75,6 +75,9 @@ DEFAULT_CONFIG = util.load_config(DEFAULT_CONFIG_PATH)
|
||||||
# This is the base config for the [pretraining] block and currently not included
|
# This is the base config for the [pretraining] block and currently not included
|
||||||
# in the main config and only added via the 'init fill-config' command
|
# in the main config and only added via the 'init fill-config' command
|
||||||
DEFAULT_CONFIG_PRETRAIN_PATH = Path(__file__).parent / "default_config_pretraining.cfg"
|
DEFAULT_CONFIG_PRETRAIN_PATH = Path(__file__).parent / "default_config_pretraining.cfg"
|
||||||
|
# Factory name indicating that the component wasn't constructed by a factory,
|
||||||
|
# and was instead passed by instance
|
||||||
|
INSTANCE_FACTORY_NAME = "__added_by_instance__"
|
||||||
|
|
||||||
# Type variable for contexts piped with documents
|
# Type variable for contexts piped with documents
|
||||||
_AnyContext = TypeVar("_AnyContext")
|
_AnyContext = TypeVar("_AnyContext")
|
||||||
|
@ -771,6 +774,9 @@ class Language:
|
||||||
"""Add a component to the processing pipeline. Valid components are
|
"""Add a component to the processing pipeline. Valid components are
|
||||||
callables that take a `Doc` object, modify it and return it. Only one
|
callables that take a `Doc` object, modify it and return it. Only one
|
||||||
of before/after/first/last can be set. Default behaviour is "last".
|
of before/after/first/last can be set. Default behaviour is "last".
|
||||||
|
Components can be added either by factory name or by instance. If
|
||||||
|
an instance is supplied and you serialize the pipeline, you'll need
|
||||||
|
to also pass an instance into spacy.load() to construct the pipeline.
|
||||||
|
|
||||||
factory_name (str): Name of the component factory.
|
factory_name (str): Name of the component factory.
|
||||||
name (str): Name of pipeline component. Overwrites existing
|
name (str): Name of pipeline component. Overwrites existing
|
||||||
|
@ -818,12 +824,61 @@ class Language:
|
||||||
raw_config=raw_config,
|
raw_config=raw_config,
|
||||||
validate=validate,
|
validate=validate,
|
||||||
)
|
)
|
||||||
pipe_index = self._get_pipe_index(before, after, first, last)
|
|
||||||
self._pipe_meta[name] = self.get_factory_meta(factory_name)
|
self._pipe_meta[name] = self.get_factory_meta(factory_name)
|
||||||
|
pipe_index = self._get_pipe_index(before, after, first, last)
|
||||||
self._components.insert(pipe_index, (name, pipe_component))
|
self._components.insert(pipe_index, (name, pipe_component))
|
||||||
self._link_components()
|
self._link_components()
|
||||||
return pipe_component
|
return pipe_component
|
||||||
|
|
||||||
|
def add_pipe_instance(
|
||||||
|
self,
|
||||||
|
component: PipeCallable,
|
||||||
|
name: Optional[str] = None,
|
||||||
|
*,
|
||||||
|
before: Optional[Union[str, int]] = None,
|
||||||
|
after: Optional[Union[str, int]] = None,
|
||||||
|
first: Optional[bool] = None,
|
||||||
|
last: Optional[bool] = None,
|
||||||
|
) -> PipeCallable:
|
||||||
|
"""Add a component instance to the processing pipeline. Valid components
|
||||||
|
are callables that take a `Doc` object, modify it and return it. Only one
|
||||||
|
of before/after/first/last can be set. Default behaviour is "last".
|
||||||
|
|
||||||
|
A limitation of this method is that spaCy will not know how to reconstruct
|
||||||
|
your pipeline after you save it out (unlike the 'Language.add_pipe()' method,
|
||||||
|
where you provide a config and let spaCy construct the instance). See 'spacy.load'
|
||||||
|
for details of how to load back a pipeline with components added by instance.
|
||||||
|
|
||||||
|
pipe_instance (Callable[[Doc], Doc]): The component to add.
|
||||||
|
name (str): Name of pipeline component. Overwrites existing
|
||||||
|
component.name attribute if available. If no name is set and
|
||||||
|
the component exposes no name attribute, component.__name__ is
|
||||||
|
used. An error is raised if a name already exists in the pipeline.
|
||||||
|
before (Union[str, int]): Name or index of the component to insert new
|
||||||
|
component directly before.
|
||||||
|
after (Union[str, int]): Name or index of the component to insert new
|
||||||
|
component directly after.
|
||||||
|
first (bool): If True, insert component first in the pipeline.
|
||||||
|
last (bool): If True, insert component last in the pipeline.
|
||||||
|
RETURNS (Callable[[Doc], Doc]): The pipeline component.
|
||||||
|
|
||||||
|
DOCS: https://spacy.io/api/language#add_pipe_instance
|
||||||
|
"""
|
||||||
|
name = name if name is not None else getattr(component, "name")
|
||||||
|
if name is None:
|
||||||
|
raise ValueError("TODO error")
|
||||||
|
if name in self.component_names:
|
||||||
|
raise ValueError(Errors.E007.format(name=name, opts=self.component_names))
|
||||||
|
|
||||||
|
# It would be possible to take arguments for the FactoryMeta here, but we'll then have
|
||||||
|
# a problem on deserialization: where will the data be coming from?
|
||||||
|
# I think if someone wants that, they should register a component function.
|
||||||
|
self._pipe_meta[name] = FactoryMeta(INSTANCE_FACTORY_NAME)
|
||||||
|
self._pipe_configs[name] = Config()
|
||||||
|
pipe_index = self._get_pipe_index(before, after, first, last)
|
||||||
|
self._components.insert(pipe_index, (name, component))
|
||||||
|
return component
|
||||||
|
|
||||||
def _get_pipe_index(
|
def _get_pipe_index(
|
||||||
self,
|
self,
|
||||||
before: Optional[Union[str, int]] = None,
|
before: Optional[Union[str, int]] = None,
|
||||||
|
@ -1735,6 +1790,7 @@ class Language:
|
||||||
meta: Dict[str, Any] = SimpleFrozenDict(),
|
meta: Dict[str, Any] = SimpleFrozenDict(),
|
||||||
auto_fill: bool = True,
|
auto_fill: bool = True,
|
||||||
validate: bool = True,
|
validate: bool = True,
|
||||||
|
pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
|
||||||
) -> "Language":
|
) -> "Language":
|
||||||
"""Create the nlp object from a loaded config. Will set up the tokenizer
|
"""Create the nlp object from a loaded config. Will set up the tokenizer
|
||||||
and language data, add pipeline components etc. If no config is provided,
|
and language data, add pipeline components etc. If no config is provided,
|
||||||
|
@ -1810,6 +1866,11 @@ class Language:
|
||||||
|
|
||||||
# Warn about require_gpu usage in jupyter notebook
|
# Warn about require_gpu usage in jupyter notebook
|
||||||
warn_if_jupyter_cupy()
|
warn_if_jupyter_cupy()
|
||||||
|
# If we've been passed pipe instances, check whether
|
||||||
|
# they have a Vocab instance, and if they do, use
|
||||||
|
# that one. This also performs some additional checks and
|
||||||
|
# warns if there's a mismatch.
|
||||||
|
vocab = _get_instantiated_vocab(vocab, pipe_instances)
|
||||||
|
|
||||||
# Note that we don't load vectors here, instead they get loaded explicitly
|
# Note that we don't load vectors here, instead they get loaded explicitly
|
||||||
# inside stuff like the spacy train function. If we loaded them here,
|
# inside stuff like the spacy train function. If we loaded them here,
|
||||||
|
@ -1826,6 +1887,11 @@ class Language:
|
||||||
interpolated = filled.interpolate() if not filled.is_interpolated else filled
|
interpolated = filled.interpolate() if not filled.is_interpolated else filled
|
||||||
pipeline = interpolated.get("components", {})
|
pipeline = interpolated.get("components", {})
|
||||||
sourced = util.get_sourced_components(interpolated)
|
sourced = util.get_sourced_components(interpolated)
|
||||||
|
# Check for components that aren't in the pipe_instances dict, aren't disabled,
|
||||||
|
# and aren't built by factory.
|
||||||
|
missing_components = _find_missing_components(pipeline, pipe_instances, exclude)
|
||||||
|
if missing_components:
|
||||||
|
raise ValueError(Errors.E1055.format(names=", ".join(missing_components)))
|
||||||
# If components are loaded from a source (existing models), we cache
|
# If components are loaded from a source (existing models), we cache
|
||||||
# them here so they're only loaded once
|
# them here so they're only loaded once
|
||||||
source_nlps = {}
|
source_nlps = {}
|
||||||
|
@ -1835,6 +1901,16 @@ class Language:
|
||||||
if pipe_name not in pipeline:
|
if pipe_name not in pipeline:
|
||||||
opts = ", ".join(pipeline.keys())
|
opts = ", ".join(pipeline.keys())
|
||||||
raise ValueError(Errors.E956.format(name=pipe_name, opts=opts))
|
raise ValueError(Errors.E956.format(name=pipe_name, opts=opts))
|
||||||
|
if pipe_name in pipe_instances:
|
||||||
|
if pipe_name in exclude:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
nlp.add_pipe_instance(pipe_instances[pipe_name])
|
||||||
|
# Is it important that we instantiate pipes that
|
||||||
|
# aren't excluded? It seems like we would want
|
||||||
|
# the exclude check above. I've left it how it
|
||||||
|
# is though, in case there's some sort of crazy
|
||||||
|
# load-bearing side-effects someone is relying on?
|
||||||
pipe_cfg = util.copy_config(pipeline[pipe_name])
|
pipe_cfg = util.copy_config(pipeline[pipe_name])
|
||||||
raw_config = Config(filled["components"][pipe_name])
|
raw_config = Config(filled["components"][pipe_name])
|
||||||
if pipe_name not in exclude:
|
if pipe_name not in exclude:
|
||||||
|
@ -2337,3 +2413,46 @@ class _Sender:
|
||||||
if self.count >= self.chunk_size:
|
if self.count >= self.chunk_size:
|
||||||
self.count = 0
|
self.count = 0
|
||||||
self.send()
|
self.send()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_instantiated_vocab(
|
||||||
|
vocab: Union[bool, Vocab], pipe_instances: Dict[str, Any]
|
||||||
|
) -> Union[bool, Vocab]:
|
||||||
|
vocab_instances = {}
|
||||||
|
for name, instance in pipe_instances.items():
|
||||||
|
if hasattr(instance, "vocab") and isinstance(instance.vocab, Vocab):
|
||||||
|
vocab_instances[name] = instance.vocab
|
||||||
|
if not vocab_instances:
|
||||||
|
return vocab
|
||||||
|
elif isinstance(vocab, Vocab):
|
||||||
|
for name, inst_voc in vocab_instances.items():
|
||||||
|
if inst_voc is not vocab:
|
||||||
|
warnings.warn(Warnings.W126.format(name=name))
|
||||||
|
return vocab
|
||||||
|
else:
|
||||||
|
resolved_vocab = None
|
||||||
|
for name, inst_voc in vocab_instances.items():
|
||||||
|
if resolved_vocab is None:
|
||||||
|
resolved_vocab = inst_voc
|
||||||
|
elif inst_voc is not resolved_vocab:
|
||||||
|
warnings.warn(Warnings.W126.format(name=name))
|
||||||
|
# This is supposed to only be for the type checker --
|
||||||
|
# it should be unreachable
|
||||||
|
assert resolved_vocab is not None
|
||||||
|
return resolved_vocab
|
||||||
|
|
||||||
|
|
||||||
|
def _find_missing_components(
|
||||||
|
pipeline: Dict[str, Dict[str, Any]],
|
||||||
|
pipe_instances: Dict[str, Any],
|
||||||
|
exclude: Iterable[str],
|
||||||
|
) -> List[str]:
|
||||||
|
missing = []
|
||||||
|
for name, config in pipeline.items():
|
||||||
|
if (
|
||||||
|
config.get("factory") == INSTANCE_FACTORY_NAME
|
||||||
|
and name not in pipe_instances
|
||||||
|
and name not in exclude
|
||||||
|
):
|
||||||
|
missing.append(name)
|
||||||
|
return missing
|
||||||
|
|
|
@ -800,3 +800,40 @@ def test_component_return():
|
||||||
nlp.add_pipe("test_component_bad_pipe")
|
nlp.add_pipe("test_component_bad_pipe")
|
||||||
with pytest.raises(ValueError, match="instead of a Doc"):
|
with pytest.raises(ValueError, match="instead of a Doc"):
|
||||||
nlp("text")
|
nlp("text")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"components,kwargs,position",
|
||||||
|
[
|
||||||
|
(["t1", "t2"], {"before": "t1"}, 0),
|
||||||
|
(["t1", "t2"], {"after": "t1"}, 1),
|
||||||
|
(["t1", "t2"], {"after": "t1"}, 1),
|
||||||
|
(["t1", "t2"], {"first": True}, 0),
|
||||||
|
(["t1", "t2"], {"last": True}, 2),
|
||||||
|
(["t1", "t2"], {"last": False}, 2),
|
||||||
|
(["t1", "t2"], {"first": False}, ValueError),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_add_pipe_instance(components, kwargs, position):
|
||||||
|
nlp = Language()
|
||||||
|
for name in components:
|
||||||
|
nlp.add_pipe("textcat", name=name)
|
||||||
|
pipe_names = list(nlp.pipe_names)
|
||||||
|
if isinstance(position, int):
|
||||||
|
result = nlp.add_pipe_instance(evil_component, name="new_component", **kwargs)
|
||||||
|
assert result is evil_component
|
||||||
|
pipe_names.insert(position, "new_component")
|
||||||
|
assert nlp.pipe_names == pipe_names
|
||||||
|
else:
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
result = nlp.add_pipe_instance(
|
||||||
|
evil_component, name="new_component", **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_pipe_instance_to_bytes():
|
||||||
|
nlp = Language()
|
||||||
|
nlp.add_pipe("textcat", name="t1")
|
||||||
|
nlp.add_pipe("textcat", name="t2")
|
||||||
|
nlp.add_pipe_instance(evil_component, name="new_component")
|
||||||
|
b = nlp.to_bytes()
|
||||||
|
|
|
@ -438,6 +438,7 @@ def load_model(
|
||||||
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
||||||
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
||||||
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
||||||
|
pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
|
||||||
) -> "Language":
|
) -> "Language":
|
||||||
"""Load a model from a package or data path.
|
"""Load a model from a package or data path.
|
||||||
|
|
||||||
|
@ -449,6 +450,9 @@ def load_model(
|
||||||
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude.
|
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude.
|
||||||
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
||||||
keyed by section values in dot notation.
|
keyed by section values in dot notation.
|
||||||
|
pipe_instances (Dict[str, Any]): Dictionary of components
|
||||||
|
to be added to the pipeline directly (not created from
|
||||||
|
config)
|
||||||
RETURNS (Language): The loaded nlp object.
|
RETURNS (Language): The loaded nlp object.
|
||||||
"""
|
"""
|
||||||
kwargs = {
|
kwargs = {
|
||||||
|
@ -457,6 +461,7 @@ def load_model(
|
||||||
"enable": enable,
|
"enable": enable,
|
||||||
"exclude": exclude,
|
"exclude": exclude,
|
||||||
"config": config,
|
"config": config,
|
||||||
|
"pipe_instances": pipe_instances,
|
||||||
}
|
}
|
||||||
if isinstance(name, str): # name or string path
|
if isinstance(name, str): # name or string path
|
||||||
if name.startswith("blank:"): # shortcut for blank model
|
if name.startswith("blank:"): # shortcut for blank model
|
||||||
|
@ -480,6 +485,7 @@ def load_model_from_package(
|
||||||
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
||||||
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
||||||
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
||||||
|
pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
|
||||||
) -> "Language":
|
) -> "Language":
|
||||||
"""Load a model from an installed package.
|
"""Load a model from an installed package.
|
||||||
|
|
||||||
|
@ -495,10 +501,13 @@ def load_model_from_package(
|
||||||
components won't be loaded.
|
components won't be loaded.
|
||||||
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
||||||
keyed by section values in dot notation.
|
keyed by section values in dot notation.
|
||||||
|
pipe_instances (Dict[str, Any]): Dictionary of components
|
||||||
|
to be added to the pipeline directly (not created from
|
||||||
|
config)
|
||||||
RETURNS (Language): The loaded nlp object.
|
RETURNS (Language): The loaded nlp object.
|
||||||
"""
|
"""
|
||||||
cls = importlib.import_module(name)
|
cls = importlib.import_module(name)
|
||||||
return cls.load(vocab=vocab, disable=disable, enable=enable, exclude=exclude, config=config) # type: ignore[attr-defined]
|
return cls.load(vocab=vocab, disable=disable, enable=enable, exclude=exclude, config=config, pipe_instances=pipe_instances) # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
|
||||||
def load_model_from_path(
|
def load_model_from_path(
|
||||||
|
@ -510,6 +519,7 @@ def load_model_from_path(
|
||||||
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
||||||
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
||||||
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
||||||
|
pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
|
||||||
) -> "Language":
|
) -> "Language":
|
||||||
"""Load a model from a data directory path. Creates Language class with
|
"""Load a model from a data directory path. Creates Language class with
|
||||||
pipeline from config.cfg and then calls from_disk() with path.
|
pipeline from config.cfg and then calls from_disk() with path.
|
||||||
|
@ -527,6 +537,9 @@ def load_model_from_path(
|
||||||
components won't be loaded.
|
components won't be loaded.
|
||||||
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
||||||
keyed by section values in dot notation.
|
keyed by section values in dot notation.
|
||||||
|
pipe_instances (Dict[str, Any]): Dictionary of components
|
||||||
|
to be added to the pipeline directly (not created from
|
||||||
|
config)
|
||||||
RETURNS (Language): The loaded nlp object.
|
RETURNS (Language): The loaded nlp object.
|
||||||
"""
|
"""
|
||||||
if not model_path.exists():
|
if not model_path.exists():
|
||||||
|
@ -543,6 +556,7 @@ def load_model_from_path(
|
||||||
enable=enable,
|
enable=enable,
|
||||||
exclude=exclude,
|
exclude=exclude,
|
||||||
meta=meta,
|
meta=meta,
|
||||||
|
pipe_instances=pipe_instances,
|
||||||
)
|
)
|
||||||
return nlp.from_disk(model_path, exclude=exclude, overrides=overrides)
|
return nlp.from_disk(model_path, exclude=exclude, overrides=overrides)
|
||||||
|
|
||||||
|
@ -557,6 +571,7 @@ def load_model_from_config(
|
||||||
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
||||||
auto_fill: bool = False,
|
auto_fill: bool = False,
|
||||||
validate: bool = True,
|
validate: bool = True,
|
||||||
|
pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
|
||||||
) -> "Language":
|
) -> "Language":
|
||||||
"""Create an nlp object from a config. Expects the full config file including
|
"""Create an nlp object from a config. Expects the full config file including
|
||||||
a section "nlp" containing the settings for the nlp object.
|
a section "nlp" containing the settings for the nlp object.
|
||||||
|
@ -574,6 +589,9 @@ def load_model_from_config(
|
||||||
components won't be loaded.
|
components won't be loaded.
|
||||||
auto_fill (bool): Whether to auto-fill config with missing defaults.
|
auto_fill (bool): Whether to auto-fill config with missing defaults.
|
||||||
validate (bool): Whether to show config validation errors.
|
validate (bool): Whether to show config validation errors.
|
||||||
|
pipe_instances (Dict[str, Any]): Dictionary of components
|
||||||
|
to be added to the pipeline directly (not created from
|
||||||
|
config)
|
||||||
RETURNS (Language): The loaded nlp object.
|
RETURNS (Language): The loaded nlp object.
|
||||||
"""
|
"""
|
||||||
if "nlp" not in config:
|
if "nlp" not in config:
|
||||||
|
@ -593,6 +611,7 @@ def load_model_from_config(
|
||||||
auto_fill=auto_fill,
|
auto_fill=auto_fill,
|
||||||
validate=validate,
|
validate=validate,
|
||||||
meta=meta,
|
meta=meta,
|
||||||
|
pipe_instances=pipe_instances,
|
||||||
)
|
)
|
||||||
return nlp
|
return nlp
|
||||||
|
|
||||||
|
@ -656,6 +675,7 @@ def load_model_from_init_py(
|
||||||
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
||||||
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
||||||
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
||||||
|
pipe_instances: Dict[str, Any] = SimpleFrozenDict(),
|
||||||
) -> "Language":
|
) -> "Language":
|
||||||
"""Helper function to use in the `load()` method of a model package's
|
"""Helper function to use in the `load()` method of a model package's
|
||||||
__init__.py.
|
__init__.py.
|
||||||
|
@ -671,6 +691,9 @@ def load_model_from_init_py(
|
||||||
components won't be loaded.
|
components won't be loaded.
|
||||||
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
|
||||||
keyed by section values in dot notation.
|
keyed by section values in dot notation.
|
||||||
|
pipe_instances (Dict[str, Any]): Dictionary of components
|
||||||
|
to be added to the pipeline directly (not created from
|
||||||
|
config)
|
||||||
RETURNS (Language): The loaded nlp object.
|
RETURNS (Language): The loaded nlp object.
|
||||||
"""
|
"""
|
||||||
model_path = Path(init_file).parent
|
model_path = Path(init_file).parent
|
||||||
|
|
Loading…
Reference in New Issue
Block a user