mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-26 13:41:21 +03:00 
			
		
		
		
	Revert disable/disabled merging behavior (#11745)
* Merge disable with disabled. Adjust warnings, errors and tests. * Replace any() with set operation. * Update spacy/tests/pipeline/test_pipe_methods.py Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Update docs. * Remve reference to config entry nlp.enabled from docs. Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
		
							parent
							
								
									2e3cfd758e
								
							
						
					
					
						commit
						20bbbe3e44
					
				|  | @ -212,8 +212,8 @@ class Warnings(metaclass=ErrorsWithCodes): | |||
|     W121 = ("Attempting to trace non-existent method '{method}' in pipe '{pipe}'") | ||||
|     W122 = ("Couldn't trace method '{method}' in pipe '{pipe}'. This can happen if the pipe class " | ||||
|             "is a Cython extension type.") | ||||
|     W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be " | ||||
|             "aware that this might affect other components in your pipeline.") | ||||
|     W123 = ("Argument `enable` with value {enable} does not contain all values specified in the config option " | ||||
|             "`enabled` ({enabled}). Be aware that this might affect other components in your pipeline.") | ||||
| 
 | ||||
| 
 | ||||
| class Errors(metaclass=ErrorsWithCodes): | ||||
|  |  | |||
|  | @ -1879,31 +1879,22 @@ class Language: | |||
|         if isinstance(exclude, str): | ||||
|             exclude = [exclude] | ||||
| 
 | ||||
|         def fetch_pipes_status(value: Iterable[str], key: str) -> Iterable[str]: | ||||
|             """Fetch value for `enable` or `disable` w.r.t. the specified config and passed arguments passed to | ||||
|             .load(). If both arguments and config specified values for this field, the passed arguments take precedence | ||||
|             and a warning is printed. | ||||
|             value (Iterable[str]): Passed value for `enable` or `disable`. | ||||
|             key (str): Key for field in config (either "enabled" or "disabled"). | ||||
|             RETURN (Iterable[str]): | ||||
|             """ | ||||
|             # We assume that no argument was passed if the value is the specified default value. | ||||
|             if id(value) == id(_DEFAULT_EMPTY_PIPES): | ||||
|                 return config["nlp"].get(key, []) | ||||
|             else: | ||||
|                 if len(config["nlp"].get(key, [])): | ||||
|                     warnings.warn( | ||||
|                         Warnings.W123.format( | ||||
|                             arg=key[:-1], | ||||
|                             arg_value=value, | ||||
|                             config_value=config["nlp"][key], | ||||
|                         ) | ||||
|         # `enable` should not be merged with `enabled` (the opposite is true for `disable`/`disabled`). If the config | ||||
|         # specifies values for `enabled` not included in `enable`, emit warning. | ||||
|         if id(enable) != id(_DEFAULT_EMPTY_PIPES): | ||||
|             enabled = config["nlp"].get("enabled", []) | ||||
|             if len(enabled) and not set(enabled).issubset(enable): | ||||
|                 warnings.warn( | ||||
|                     Warnings.W123.format( | ||||
|                         enable=enable, | ||||
|                         enabled=enabled, | ||||
|                     ) | ||||
|                 return value | ||||
|                 ) | ||||
| 
 | ||||
|         # Ensure sets of disabled/enabled pipe names are not contradictory. | ||||
|         disabled_pipes = cls._resolve_component_status( | ||||
|             fetch_pipes_status(disable, "disabled"), | ||||
|             fetch_pipes_status(enable, "enabled"), | ||||
|             list({*disable, *config["nlp"].get("disabled", [])}), | ||||
|             enable, | ||||
|             config["nlp"]["pipeline"], | ||||
|         ) | ||||
|         nlp._disabled = set(p for p in disabled_pipes if p not in exclude) | ||||
|  | @ -2084,10 +2075,12 @@ class Language: | |||
|         if enable: | ||||
|             if isinstance(enable, str): | ||||
|                 enable = [enable] | ||||
|             to_disable = [ | ||||
|                 pipe_name for pipe_name in pipe_names if pipe_name not in enable | ||||
|             ] | ||||
|             if disable and disable != to_disable: | ||||
|             to_disable = { | ||||
|                 *[pipe_name for pipe_name in pipe_names if pipe_name not in enable], | ||||
|                 *disable, | ||||
|             } | ||||
|             # If any pipe to be enabled is in to_disable, the specification is inconsistent. | ||||
|             if len(set(enable) & to_disable): | ||||
|                 raise ValueError(Errors.E1042.format(enable=enable, disable=disable)) | ||||
| 
 | ||||
|         return tuple(to_disable) | ||||
|  |  | |||
|  | @ -615,20 +615,18 @@ def test_enable_disable_conflict_with_config(): | |||
| 
 | ||||
|     with make_tempdir() as tmp_dir: | ||||
|         nlp.to_disk(tmp_dir) | ||||
|         # Expected to fail, as config and arguments conflict. | ||||
|         with pytest.raises(ValueError): | ||||
|             spacy.load( | ||||
|                 tmp_dir, enable=["tagger"], config={"nlp": {"disabled": ["senter"]}} | ||||
|             ) | ||||
|         # Expected to succeed, as config and arguments do not conflict. | ||||
|         assert spacy.load( | ||||
|             tmp_dir, enable=["tagger"], config={"nlp": {"disabled": ["senter"]}} | ||||
|         ).disabled == ["senter", "sentencizer"] | ||||
|         # Expected to succeed without warning due to the lack of a conflicting config option. | ||||
|         spacy.load(tmp_dir, enable=["tagger"]) | ||||
|         # Expected to succeed with a warning, as disable=[] should override the config setting. | ||||
|         with pytest.warns(UserWarning): | ||||
|         # Expected to fail due to conflict between enable and disabled. | ||||
|         with pytest.raises(ValueError): | ||||
|             spacy.load( | ||||
|                 tmp_dir, | ||||
|                 enable=["tagger"], | ||||
|                 disable=[], | ||||
|                 config={"nlp": {"disabled": ["senter"]}}, | ||||
|                 enable=["senter"], | ||||
|                 config={"nlp": {"disabled": ["senter", "tagger"]}}, | ||||
|             ) | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -404,11 +404,10 @@ def test_serialize_pipeline_disable_enable(): | |||
|     assert nlp3.component_names == ["ner", "tagger"] | ||||
|     with make_tempdir() as d: | ||||
|         nlp3.to_disk(d) | ||||
|         with pytest.warns(UserWarning): | ||||
|             nlp4 = spacy.load(d, disable=["ner"]) | ||||
|     assert nlp4.pipe_names == ["tagger"] | ||||
|         nlp4 = spacy.load(d, disable=["ner"]) | ||||
|     assert nlp4.pipe_names == [] | ||||
|     assert nlp4.component_names == ["ner", "tagger"] | ||||
|     assert nlp4.disabled == ["ner"] | ||||
|     assert nlp4.disabled == ["ner", "tagger"] | ||||
|     with make_tempdir() as d: | ||||
|         nlp.to_disk(d) | ||||
|         nlp5 = spacy.load(d, exclude=["tagger"]) | ||||
|  |  | |||
|  | @ -63,18 +63,18 @@ spaCy loads a model under the hood based on its | |||
| > nlp = Language.from_config(config) | ||||
| > ``` | ||||
| 
 | ||||
| | Name                                  | Description                                                                                                                                                                                                                                                          | | ||||
| | ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `config`                              | The loaded config. ~~Union[Dict[str, Any], Config]~~                                                                                                                                                                                                                 | | ||||
| | _keyword-only_                        |                                                                                                                                                                                                                                                                      | | ||||
| | `vocab`                               | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~                                                                                                                                                                  | | ||||
| | `disable`                             | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ | | ||||
| | `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~                                  | | ||||
| | `exclude`                             | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~                                                                                                             | | ||||
| | `meta`                                | [Meta data](/api/data-formats#meta) overrides. ~~Dict[str, Any]~~                                                                                                                                                                                                    | | ||||
| | `auto_fill`                           | Whether to automatically fill in missing values in the config, based on defaults and function argument annotations. Defaults to `True`. ~~bool~~                                                                                                                     | | ||||
| | `validate`                            | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~                                                                                                                                       | | ||||
| | **RETURNS**                           | The initialized object. ~~Language~~                                                                                                                                                                                                                                 | | ||||
| | Name                                  | Description                                                                                                                                                                                                                                                                                                      | | ||||
| | ------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `config`                              | The loaded config. ~~Union[Dict[str, Any], Config]~~                                                                                                                                                                                                                                                             | | ||||
| | _keyword-only_                        |                                                                                                                                                                                                                                                                                                                  | | ||||
| | `vocab`                               | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~                                                                                                                                                                                                              | | ||||
| | `disable`                             | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). Is merged with the config entry `nlp.disabled`. ~~Union[str, Iterable[str]]~~ | | ||||
| | `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [nlp.enable_pipe](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~                                       | | ||||
| | `exclude`                             | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~                                                                                                                                                         | | ||||
| | `meta`                                | [Meta data](/api/data-formats#meta) overrides. ~~Dict[str, Any]~~                                                                                                                                                                                                                                                | | ||||
| | `auto_fill`                           | Whether to automatically fill in missing values in the config, based on defaults and function argument annotations. Defaults to `True`. ~~bool~~                                                                                                                                                                 | | ||||
| | `validate`                            | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~                                                                                                                                                                                   | | ||||
| | **RETURNS**                           | The initialized object. ~~Language~~                                                                                                                                                                                                                                                                             | | ||||
| 
 | ||||
| ## Language.component {#component tag="classmethod" new="3"} | ||||
| 
 | ||||
|  |  | |||
|  | @ -45,16 +45,16 @@ specified separately using the new `exclude` keyword argument. | |||
| > nlp = spacy.load("en_core_web_sm", exclude=["parser", "tagger"]) | ||||
| > ``` | ||||
| 
 | ||||
| | Name                                  | Description                                                                                                                                                                                                                                                        | | ||||
| | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | ||||
| | `name`                                | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~                                                                                                                                                                                                  | | ||||
| | _keyword-only_                        |                                                                                                                                                                                                                                                                    | | ||||
| | `vocab`                               | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~                                                                                                                              | | ||||
| | `disable`                             | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ | | ||||
| | `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~Union[str, Iterable[str]]~~                                                                                                               | | ||||
| | `exclude` <Tag variant="new">3</Tag>  | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~                                                                                                           | | ||||
| | `config` <Tag variant="new">3</Tag>   | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~                                                                                                 | | ||||
| | **RETURNS**                           | A `Language` object with the loaded pipeline. ~~Language~~                                                                                                                                                                                                         | | ||||
| | Name                                  | Description                                                                                                                                                                                                                                                                                                      | | ||||
| | ------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `name`                                | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~                                                                                                                                                                                                                                                | | ||||
| | _keyword-only_                        |                                                                                                                                                                                                                                                                                                                  | | ||||
| | `vocab`                               | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~                                                                                                                                                                            | | ||||
| | `disable`                             | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). Is merged with the config entry `nlp.disabled`. ~~Union[str, Iterable[str]]~~ | | ||||
| | `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~Union[str, Iterable[str]]~~                                                                                                                    | | ||||
| | `exclude` <Tag variant="new">3</Tag>  | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~                                                                                                                                                         | | ||||
| | `config` <Tag variant="new">3</Tag>   | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~                                                                                                                                               | | ||||
| | **RETURNS**                           | A `Language` object with the loaded pipeline. ~~Language~~                                                                                                                                                                                                                                                       | | ||||
| 
 | ||||
| Essentially, `spacy.load()` is a convenience wrapper that reads the pipeline's | ||||
| [`config.cfg`](/api/data-formats#config), uses the language and pipeline | ||||
|  |  | |||
|  | @ -363,7 +363,8 @@ nlp.enable_pipe("tagger") | |||
| ``` | ||||
| 
 | ||||
| In addition to `disable`, `spacy.load()` also accepts `enable`. If `enable` is | ||||
| set, all components except for those in `enable` are disabled. | ||||
| set, all components except for those in `enable` are disabled. If `enable` and | ||||
| `disable` conflict (i.e. the same component is included in both), an error is raised. | ||||
| 
 | ||||
| ```python | ||||
| # Load the complete pipeline, but disable all components except for tok2vec and tagger | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user