mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Revert disable/disabled merging behavior (#11745)
* Merge disable with disabled. Adjust warnings, errors and tests. * Replace any() with set operation. * Update spacy/tests/pipeline/test_pipe_methods.py Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Update docs. * Remve reference to config entry nlp.enabled from docs. Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
parent
2e3cfd758e
commit
20bbbe3e44
|
@ -212,8 +212,8 @@ class Warnings(metaclass=ErrorsWithCodes):
|
||||||
W121 = ("Attempting to trace non-existent method '{method}' in pipe '{pipe}'")
|
W121 = ("Attempting to trace non-existent method '{method}' in pipe '{pipe}'")
|
||||||
W122 = ("Couldn't trace method '{method}' in pipe '{pipe}'. This can happen if the pipe class "
|
W122 = ("Couldn't trace method '{method}' in pipe '{pipe}'. This can happen if the pipe class "
|
||||||
"is a Cython extension type.")
|
"is a Cython extension type.")
|
||||||
W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be "
|
W123 = ("Argument `enable` with value {enable} does not contain all values specified in the config option "
|
||||||
"aware that this might affect other components in your pipeline.")
|
"`enabled` ({enabled}). Be aware that this might affect other components in your pipeline.")
|
||||||
|
|
||||||
|
|
||||||
class Errors(metaclass=ErrorsWithCodes):
|
class Errors(metaclass=ErrorsWithCodes):
|
||||||
|
|
|
@ -1879,31 +1879,22 @@ class Language:
|
||||||
if isinstance(exclude, str):
|
if isinstance(exclude, str):
|
||||||
exclude = [exclude]
|
exclude = [exclude]
|
||||||
|
|
||||||
def fetch_pipes_status(value: Iterable[str], key: str) -> Iterable[str]:
|
# `enable` should not be merged with `enabled` (the opposite is true for `disable`/`disabled`). If the config
|
||||||
"""Fetch value for `enable` or `disable` w.r.t. the specified config and passed arguments passed to
|
# specifies values for `enabled` not included in `enable`, emit warning.
|
||||||
.load(). If both arguments and config specified values for this field, the passed arguments take precedence
|
if id(enable) != id(_DEFAULT_EMPTY_PIPES):
|
||||||
and a warning is printed.
|
enabled = config["nlp"].get("enabled", [])
|
||||||
value (Iterable[str]): Passed value for `enable` or `disable`.
|
if len(enabled) and not set(enabled).issubset(enable):
|
||||||
key (str): Key for field in config (either "enabled" or "disabled").
|
|
||||||
RETURN (Iterable[str]):
|
|
||||||
"""
|
|
||||||
# We assume that no argument was passed if the value is the specified default value.
|
|
||||||
if id(value) == id(_DEFAULT_EMPTY_PIPES):
|
|
||||||
return config["nlp"].get(key, [])
|
|
||||||
else:
|
|
||||||
if len(config["nlp"].get(key, [])):
|
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
Warnings.W123.format(
|
Warnings.W123.format(
|
||||||
arg=key[:-1],
|
enable=enable,
|
||||||
arg_value=value,
|
enabled=enabled,
|
||||||
config_value=config["nlp"][key],
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return value
|
|
||||||
|
|
||||||
|
# Ensure sets of disabled/enabled pipe names are not contradictory.
|
||||||
disabled_pipes = cls._resolve_component_status(
|
disabled_pipes = cls._resolve_component_status(
|
||||||
fetch_pipes_status(disable, "disabled"),
|
list({*disable, *config["nlp"].get("disabled", [])}),
|
||||||
fetch_pipes_status(enable, "enabled"),
|
enable,
|
||||||
config["nlp"]["pipeline"],
|
config["nlp"]["pipeline"],
|
||||||
)
|
)
|
||||||
nlp._disabled = set(p for p in disabled_pipes if p not in exclude)
|
nlp._disabled = set(p for p in disabled_pipes if p not in exclude)
|
||||||
|
@ -2084,10 +2075,12 @@ class Language:
|
||||||
if enable:
|
if enable:
|
||||||
if isinstance(enable, str):
|
if isinstance(enable, str):
|
||||||
enable = [enable]
|
enable = [enable]
|
||||||
to_disable = [
|
to_disable = {
|
||||||
pipe_name for pipe_name in pipe_names if pipe_name not in enable
|
*[pipe_name for pipe_name in pipe_names if pipe_name not in enable],
|
||||||
]
|
*disable,
|
||||||
if disable and disable != to_disable:
|
}
|
||||||
|
# If any pipe to be enabled is in to_disable, the specification is inconsistent.
|
||||||
|
if len(set(enable) & to_disable):
|
||||||
raise ValueError(Errors.E1042.format(enable=enable, disable=disable))
|
raise ValueError(Errors.E1042.format(enable=enable, disable=disable))
|
||||||
|
|
||||||
return tuple(to_disable)
|
return tuple(to_disable)
|
||||||
|
|
|
@ -615,20 +615,18 @@ def test_enable_disable_conflict_with_config():
|
||||||
|
|
||||||
with make_tempdir() as tmp_dir:
|
with make_tempdir() as tmp_dir:
|
||||||
nlp.to_disk(tmp_dir)
|
nlp.to_disk(tmp_dir)
|
||||||
# Expected to fail, as config and arguments conflict.
|
# Expected to succeed, as config and arguments do not conflict.
|
||||||
with pytest.raises(ValueError):
|
assert spacy.load(
|
||||||
spacy.load(
|
|
||||||
tmp_dir, enable=["tagger"], config={"nlp": {"disabled": ["senter"]}}
|
tmp_dir, enable=["tagger"], config={"nlp": {"disabled": ["senter"]}}
|
||||||
)
|
).disabled == ["senter", "sentencizer"]
|
||||||
# Expected to succeed without warning due to the lack of a conflicting config option.
|
# Expected to succeed without warning due to the lack of a conflicting config option.
|
||||||
spacy.load(tmp_dir, enable=["tagger"])
|
spacy.load(tmp_dir, enable=["tagger"])
|
||||||
# Expected to succeed with a warning, as disable=[] should override the config setting.
|
# Expected to fail due to conflict between enable and disabled.
|
||||||
with pytest.warns(UserWarning):
|
with pytest.raises(ValueError):
|
||||||
spacy.load(
|
spacy.load(
|
||||||
tmp_dir,
|
tmp_dir,
|
||||||
enable=["tagger"],
|
enable=["senter"],
|
||||||
disable=[],
|
config={"nlp": {"disabled": ["senter", "tagger"]}},
|
||||||
config={"nlp": {"disabled": ["senter"]}},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -404,11 +404,10 @@ def test_serialize_pipeline_disable_enable():
|
||||||
assert nlp3.component_names == ["ner", "tagger"]
|
assert nlp3.component_names == ["ner", "tagger"]
|
||||||
with make_tempdir() as d:
|
with make_tempdir() as d:
|
||||||
nlp3.to_disk(d)
|
nlp3.to_disk(d)
|
||||||
with pytest.warns(UserWarning):
|
|
||||||
nlp4 = spacy.load(d, disable=["ner"])
|
nlp4 = spacy.load(d, disable=["ner"])
|
||||||
assert nlp4.pipe_names == ["tagger"]
|
assert nlp4.pipe_names == []
|
||||||
assert nlp4.component_names == ["ner", "tagger"]
|
assert nlp4.component_names == ["ner", "tagger"]
|
||||||
assert nlp4.disabled == ["ner"]
|
assert nlp4.disabled == ["ner", "tagger"]
|
||||||
with make_tempdir() as d:
|
with make_tempdir() as d:
|
||||||
nlp.to_disk(d)
|
nlp.to_disk(d)
|
||||||
nlp5 = spacy.load(d, exclude=["tagger"])
|
nlp5 = spacy.load(d, exclude=["tagger"])
|
||||||
|
|
|
@ -64,12 +64,12 @@ spaCy loads a model under the hood based on its
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `config` | The loaded config. ~~Union[Dict[str, Any], Config]~~ |
|
| `config` | The loaded config. ~~Union[Dict[str, Any], Config]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `vocab` | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~ |
|
| `vocab` | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~ |
|
||||||
| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
|
| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). Is merged with the config entry `nlp.disabled`. ~~Union[str, Iterable[str]]~~ |
|
||||||
| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
|
| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [nlp.enable_pipe](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
|
||||||
| `exclude` | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
|
| `exclude` | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
|
||||||
| `meta` | [Meta data](/api/data-formats#meta) overrides. ~~Dict[str, Any]~~ |
|
| `meta` | [Meta data](/api/data-formats#meta) overrides. ~~Dict[str, Any]~~ |
|
||||||
| `auto_fill` | Whether to automatically fill in missing values in the config, based on defaults and function argument annotations. Defaults to `True`. ~~bool~~ |
|
| `auto_fill` | Whether to automatically fill in missing values in the config, based on defaults and function argument annotations. Defaults to `True`. ~~bool~~ |
|
||||||
|
|
|
@ -46,11 +46,11 @@ specified separately using the new `exclude` keyword argument.
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
| ------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `name` | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~ |
|
| `name` | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `vocab` | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
|
| `vocab` | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
|
||||||
| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
|
| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). Is merged with the config entry `nlp.disabled`. ~~Union[str, Iterable[str]]~~ |
|
||||||
| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~Union[str, Iterable[str]]~~ |
|
| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~Union[str, Iterable[str]]~~ |
|
||||||
| `exclude` <Tag variant="new">3</Tag> | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
|
| `exclude` <Tag variant="new">3</Tag> | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
|
||||||
| `config` <Tag variant="new">3</Tag> | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~ |
|
| `config` <Tag variant="new">3</Tag> | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~ |
|
||||||
|
|
|
@ -363,7 +363,8 @@ nlp.enable_pipe("tagger")
|
||||||
```
|
```
|
||||||
|
|
||||||
In addition to `disable`, `spacy.load()` also accepts `enable`. If `enable` is
|
In addition to `disable`, `spacy.load()` also accepts `enable`. If `enable` is
|
||||||
set, all components except for those in `enable` are disabled.
|
set, all components except for those in `enable` are disabled. If `enable` and
|
||||||
|
`disable` conflict (i.e. the same component is included in both), an error is raised.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# Load the complete pipeline, but disable all components except for tok2vec and tagger
|
# Load the complete pipeline, but disable all components except for tok2vec and tagger
|
||||||
|
|
Loading…
Reference in New Issue
Block a user