Allow string argument for disable/enable/exclude (#11406)

* adding unit test for spacy.load with disable/exclude string arg

* allow pure strings in from_config

* update docs

* upstream type adjustements

* docs update

* make docstring more consistent

* Update spacy/language.py

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* two more cleanups

* fix type in internal method

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
Sofie Van Landeghem 2022-08-31 09:02:34 +02:00 committed by GitHub
parent 3f4b4b7b4f
commit 8fc0efc502
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 112 additions and 88 deletions

View File

@ -31,21 +31,21 @@ def load(
name: Union[str, Path],
*,
vocab: Union[Vocab, bool] = True,
disable: Iterable[str] = util.SimpleFrozenList(),
enable: Iterable[str] = util.SimpleFrozenList(),
exclude: Iterable[str] = util.SimpleFrozenList(),
disable: Union[str, Iterable[str]] = util.SimpleFrozenList(),
enable: Union[str, Iterable[str]] = util.SimpleFrozenList(),
exclude: Union[str, Iterable[str]] = util.SimpleFrozenList(),
config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
) -> Language:
"""Load a spaCy model from an installed package or a local path.
name (str): Package name or model path.
vocab (Vocab): A Vocab object. If True, a vocab is created.
disable (Iterable[str]): Names of pipeline components to disable. Disabled
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
enable (Iterable[str]): Names of pipeline components to enable. All other
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (but can be enabled later using nlp.enable_pipe).
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.

View File

@ -1063,7 +1063,7 @@ class Language:
"""
if enable is None and disable is None:
raise ValueError(Errors.E991)
if disable is not None and isinstance(disable, str):
if isinstance(disable, str):
disable = [disable]
if enable is not None:
if isinstance(enable, str):
@ -1698,9 +1698,9 @@ class Language:
config: Union[Dict[str, Any], Config] = {},
*,
vocab: Union[Vocab, bool] = True,
disable: Iterable[str] = SimpleFrozenList(),
enable: Iterable[str] = SimpleFrozenList(),
exclude: Iterable[str] = SimpleFrozenList(),
disable: Union[str, Iterable[str]] = SimpleFrozenList(),
enable: Union[str, Iterable[str]] = SimpleFrozenList(),
exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
meta: Dict[str, Any] = SimpleFrozenDict(),
auto_fill: bool = True,
validate: bool = True,
@ -1711,12 +1711,12 @@ class Language:
config (Dict[str, Any] / Config): The loaded config.
vocab (Vocab): A Vocab object. If True, a vocab is created.
disable (Iterable[str]): Names of pipeline components to disable.
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable.
Disabled pipes will be loaded but they won't be run unless you
explicitly enable them by calling nlp.enable_pipe.
enable (Iterable[str]): Names of pipeline components to enable. All other
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Iterable[str]): Names of pipeline components to exclude.
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude.
Excluded components won't be loaded.
meta (Dict[str, Any]): Meta overrides for nlp.meta.
auto_fill (bool): Automatically fill in missing values in config based
@ -1727,6 +1727,12 @@ class Language:
DOCS: https://spacy.io/api/language#from_config
"""
if isinstance(disable, str):
disable = [disable]
if isinstance(enable, str):
enable = [enable]
if isinstance(exclude, str):
exclude = [exclude]
if auto_fill:
config = Config(
cls.default_config, section_order=CONFIG_SECTION_ORDER
@ -2031,25 +2037,29 @@ class Language:
@staticmethod
def _resolve_component_status(
disable: Iterable[str], enable: Iterable[str], pipe_names: Collection[str]
disable: Union[str, Iterable[str]],
enable: Union[str, Iterable[str]],
pipe_names: Iterable[str],
) -> Tuple[str, ...]:
"""Derives whether (1) `disable` and `enable` values are consistent and (2)
resolves those to a single set of disabled components. Raises an error in
case of inconsistency.
disable (Iterable[str]): Names of components or serialization fields to disable.
enable (Iterable[str]): Names of pipeline components to enable.
disable (Union[str, Iterable[str]]): Name(s) of component(s) or serialization fields to disable.
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable.
pipe_names (Iterable[str]): Names of all pipeline components.
RETURNS (Tuple[str, ...]): Names of components to exclude from pipeline w.r.t.
specified includes and excludes.
"""
if disable is not None and isinstance(disable, str):
if isinstance(disable, str):
disable = [disable]
to_disable = disable
if enable:
if isinstance(enable, str):
enable = [enable]
to_disable = [
pipe_name for pipe_name in pipe_names if pipe_name not in enable
]

View File

@ -618,6 +618,7 @@ def test_load_disable_enable() -> None:
base_nlp.to_disk(tmp_dir)
to_disable = ["parser", "tagger"]
to_enable = ["tagger", "parser"]
single_str = "tagger"
# Setting only `disable`.
nlp = spacy.load(tmp_dir, disable=to_disable)
@ -632,6 +633,16 @@ def test_load_disable_enable() -> None:
]
)
# Loading with a string representing one component
nlp = spacy.load(tmp_dir, exclude=single_str)
assert single_str not in nlp.component_names
nlp = spacy.load(tmp_dir, disable=single_str)
assert single_str in nlp.component_names
assert single_str not in nlp.pipe_names
assert nlp._disabled == {single_str}
assert nlp.disabled == [single_str]
# Testing consistent enable/disable combination.
nlp = spacy.load(
tmp_dir,

View File

@ -398,9 +398,9 @@ def load_model(
name: Union[str, Path],
*,
vocab: Union["Vocab", bool] = True,
disable: Iterable[str] = SimpleFrozenList(),
enable: Iterable[str] = SimpleFrozenList(),
exclude: Iterable[str] = SimpleFrozenList(),
disable: Union[str, Iterable[str]] = SimpleFrozenList(),
enable: Union[str, Iterable[str]] = SimpleFrozenList(),
exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language":
"""Load a model from a package or data path.
@ -408,9 +408,9 @@ def load_model(
name (str): Package name or model path.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Iterable[str]): Names of pipeline components to disable.
enable (Iterable[str]): Names of pipeline components to enable. All others will be disabled.
exclude (Iterable[str]): Names of pipeline components to exclude.
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable.
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All others will be disabled.
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
@ -440,9 +440,9 @@ def load_model_from_package(
name: str,
*,
vocab: Union["Vocab", bool] = True,
disable: Iterable[str] = SimpleFrozenList(),
enable: Iterable[str] = SimpleFrozenList(),
exclude: Iterable[str] = SimpleFrozenList(),
disable: Union[str, Iterable[str]] = SimpleFrozenList(),
enable: Union[str, Iterable[str]] = SimpleFrozenList(),
exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language":
"""Load a model from an installed package.
@ -450,12 +450,12 @@ def load_model_from_package(
name (str): The package name.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Iterable[str]): Names of pipeline components to disable. Disabled
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
enable (Iterable[str]): Names of pipeline components to enable. All other
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
@ -470,9 +470,9 @@ def load_model_from_path(
*,
meta: Optional[Dict[str, Any]] = None,
vocab: Union["Vocab", bool] = True,
disable: Iterable[str] = SimpleFrozenList(),
enable: Iterable[str] = SimpleFrozenList(),
exclude: Iterable[str] = SimpleFrozenList(),
disable: Union[str, Iterable[str]] = SimpleFrozenList(),
enable: Union[str, Iterable[str]] = SimpleFrozenList(),
exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language":
"""Load a model from a data directory path. Creates Language class with
@ -482,12 +482,12 @@ def load_model_from_path(
meta (Dict[str, Any]): Optional model meta.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Iterable[str]): Names of pipeline components to disable. Disabled
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
enable (Iterable[str]): Names of pipeline components to enable. All other
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
@ -516,9 +516,9 @@ def load_model_from_config(
*,
meta: Dict[str, Any] = SimpleFrozenDict(),
vocab: Union["Vocab", bool] = True,
disable: Iterable[str] = SimpleFrozenList(),
enable: Iterable[str] = SimpleFrozenList(),
exclude: Iterable[str] = SimpleFrozenList(),
disable: Union[str, Iterable[str]] = SimpleFrozenList(),
enable: Union[str, Iterable[str]] = SimpleFrozenList(),
exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
auto_fill: bool = False,
validate: bool = True,
) -> "Language":
@ -529,12 +529,12 @@ def load_model_from_config(
meta (Dict[str, Any]): Optional model meta.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Iterable[str]): Names of pipeline components to disable. Disabled
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
enable (Iterable[str]): Names of pipeline components to enable. All other
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
components won't be loaded.
auto_fill (bool): Whether to auto-fill config with missing defaults.
validate (bool): Whether to show config validation errors.
@ -616,9 +616,9 @@ def load_model_from_init_py(
init_file: Union[Path, str],
*,
vocab: Union["Vocab", bool] = True,
disable: Iterable[str] = SimpleFrozenList(),
enable: Iterable[str] = SimpleFrozenList(),
exclude: Iterable[str] = SimpleFrozenList(),
disable: Union[str, Iterable[str]] = SimpleFrozenList(),
enable: Union[str, Iterable[str]] = SimpleFrozenList(),
exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language":
"""Helper function to use in the `load()` method of a model package's
@ -626,12 +626,12 @@ def load_model_from_init_py(
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Iterable[str]): Names of pipeline components to disable. Disabled
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
enable (Iterable[str]): Names of pipeline components to enable. All other
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.

View File

@ -64,12 +64,13 @@ spaCy loads a model under the hood based on its
> ```
| Name | Description |
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `config` | The loaded config. ~~Union[Dict[str, Any], Config]~~ |
| _keyword-only_ | |
| `vocab` | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~ |
| `disable` | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~List[str]~~ |
| `exclude` | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~ |
| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
| `exclude` | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
| `meta` | [Meta data](/api/data-formats#meta) overrides. ~~Dict[str, Any]~~ |
| `auto_fill` | Whether to automatically fill in missing values in the config, based on defaults and function argument annotations. Defaults to `True`. ~~bool~~ |
| `validate` | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~ |
@ -695,8 +696,8 @@ As of spaCy v3.0, the `disable_pipes` method has been renamed to `select_pipes`:
| Name | Description |
| -------------- | ------------------------------------------------------------------------------------------------------ |
| _keyword-only_ | |
| `disable` | Name(s) of pipeline components to disable. ~~Optional[Union[str, Iterable[str]]]~~ |
| `enable` | Name(s) of pipeline components that will not be disabled. ~~Optional[Union[str, Iterable[str]]]~~ |
| `disable` | Name(s) of pipeline component(s) to disable. ~~Optional[Union[str, Iterable[str]]]~~ |
| `enable` | Name(s) of pipeline component(s) that will not be disabled. ~~Optional[Union[str, Iterable[str]]]~~ |
| **RETURNS** | The disabled pipes that can be restored by calling the object's `.restore()` method. ~~DisabledPipes~~ |
## Language.get_factory_meta {#get_factory_meta tag="classmethod" new="3"}

View File

@ -46,13 +46,13 @@ specified separately using the new `exclude` keyword argument.
> ```
| Name | Description |
| ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `name` | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~ |
| _keyword-only_ | |
| `vocab` | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
| `disable` | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~List[str]~~ |
| `enable` | Names of pipeline components to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~List[str]~~ |
| `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~ |
| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~Union[str, Iterable[str]]~~ |
| `exclude` <Tag variant="new">3</Tag> | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
| `config` <Tag variant="new">3</Tag> | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~ |
| **RETURNS** | A `Language` object with the loaded pipeline. ~~Language~~ |
@ -1050,12 +1050,13 @@ and create a `Language` object. The model data will then be loaded in via
> ```
| Name | Description |
| ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `name` | Package name or path. ~~str~~ |
| _keyword-only_ | |
| `vocab` | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
| `disable` | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~List[str]~~ |
| `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~ |
| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
| `exclude` | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
| `config` <Tag variant="new">3</Tag> | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~ |
| **RETURNS** | `Language` class with the loaded pipeline. ~~Language~~ |
@ -1074,12 +1075,13 @@ A helper function to use in the `load()` method of a pipeline package's
> ```
| Name | Description |
| ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `init_file` | Path to package's `__init__.py`, i.e. `__file__`. ~~Union[str, Path]~~ |
| _keyword-only_ | |
| `vocab` <Tag variant="new">3</Tag> | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
| `disable` | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~List[str]~~ |
| `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~ |
| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
| `exclude` <Tag variant="new">3</Tag> | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
| `config` <Tag variant="new">3</Tag> | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~ |
| **RETURNS** | `Language` class with the loaded pipeline. ~~Language~~ |