mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-29 23:17:59 +03:00 
			
		
		
		
	Cleanup/remove backwards compat overwrite settings (#11888)
* Remove backwards-compatible overwrite from Entity Linker This also adds a docstring about overwrite, since it wasn't present. * Fix docstring * Remove backward compat settings in Morphologizer This also needed a docstring added. For this component it's less clear what the right overwrite settings are. * Remove backward compat from sentencizer This was simple * Remove backward compat from senter Another simple one * Remove backward compat setting from tagger * Add docstrings * Update spacy/pipeline/morphologizer.pyx Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Update docs --------- Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
		
							parent
							
								
									6920fb7baf
								
							
						
					
					
						commit
						89f974d4f5
					
				|  | @ -27,9 +27,6 @@ ActivationsT = Dict[str, Union[List[Ragged], List[str]]] | ||||||
| 
 | 
 | ||||||
| KNOWLEDGE_BASE_IDS = "kb_ids" | KNOWLEDGE_BASE_IDS = "kb_ids" | ||||||
| 
 | 
 | ||||||
| # See #9050 |  | ||||||
| BACKWARD_OVERWRITE = True |  | ||||||
| 
 |  | ||||||
| default_model_config = """ | default_model_config = """ | ||||||
| [model] | [model] | ||||||
| @architectures = "spacy.EntityLinker.v2" | @architectures = "spacy.EntityLinker.v2" | ||||||
|  | @ -60,7 +57,7 @@ DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"] | ||||||
|         "entity_vector_length": 64, |         "entity_vector_length": 64, | ||||||
|         "get_candidates": {"@misc": "spacy.CandidateGenerator.v1"}, |         "get_candidates": {"@misc": "spacy.CandidateGenerator.v1"}, | ||||||
|         "get_candidates_batch": {"@misc": "spacy.CandidateBatchGenerator.v1"}, |         "get_candidates_batch": {"@misc": "spacy.CandidateBatchGenerator.v1"}, | ||||||
|         "overwrite": True, |         "overwrite": False, | ||||||
|         "scorer": {"@scorers": "spacy.entity_linker_scorer.v1"}, |         "scorer": {"@scorers": "spacy.entity_linker_scorer.v1"}, | ||||||
|         "use_gold_ents": True, |         "use_gold_ents": True, | ||||||
|         "candidates_batch_size": 1, |         "candidates_batch_size": 1, | ||||||
|  | @ -191,7 +188,7 @@ class EntityLinker(TrainablePipe): | ||||||
|         get_candidates_batch: Callable[ |         get_candidates_batch: Callable[ | ||||||
|             [KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]] |             [KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]] | ||||||
|         ], |         ], | ||||||
|         overwrite: bool = BACKWARD_OVERWRITE, |         overwrite: bool = False, | ||||||
|         scorer: Optional[Callable] = entity_linker_score, |         scorer: Optional[Callable] = entity_linker_score, | ||||||
|         use_gold_ents: bool, |         use_gold_ents: bool, | ||||||
|         candidates_batch_size: int, |         candidates_batch_size: int, | ||||||
|  | @ -215,6 +212,7 @@ class EntityLinker(TrainablePipe): | ||||||
|             Callable[[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]], |             Callable[[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]], | ||||||
|             Iterable[Candidate]] |             Iterable[Candidate]] | ||||||
|             ): Function that produces a list of candidates, given a certain knowledge base and several textual mentions. |             ): Function that produces a list of candidates, given a certain knowledge base and several textual mentions. | ||||||
|  |         overwrite (bool): Whether to overwrite existing non-empty annotations. | ||||||
|         scorer (Optional[Callable]): The scoring method. Defaults to Scorer.score_links. |         scorer (Optional[Callable]): The scoring method. Defaults to Scorer.score_links. | ||||||
|         use_gold_ents (bool): Whether to copy entities from gold docs or not. If false, another |         use_gold_ents (bool): Whether to copy entities from gold docs or not. If false, another | ||||||
|             component must provide entity annotations. |             component must provide entity annotations. | ||||||
|  |  | ||||||
|  | @ -21,10 +21,6 @@ from ..scorer import Scorer | ||||||
| from ..training import validate_examples, validate_get_examples | from ..training import validate_examples, validate_get_examples | ||||||
| from ..util import registry | from ..util import registry | ||||||
| 
 | 
 | ||||||
| # See #9050 |  | ||||||
| BACKWARD_OVERWRITE = True |  | ||||||
| BACKWARD_EXTEND = False |  | ||||||
| 
 |  | ||||||
| default_model_config = """ | default_model_config = """ | ||||||
| [model] | [model] | ||||||
| @architectures = "spacy.Tagger.v2" | @architectures = "spacy.Tagger.v2" | ||||||
|  | @ -102,8 +98,8 @@ class Morphologizer(Tagger): | ||||||
|         model: Model, |         model: Model, | ||||||
|         name: str = "morphologizer", |         name: str = "morphologizer", | ||||||
|         *, |         *, | ||||||
|         overwrite: bool = BACKWARD_OVERWRITE, |         overwrite: bool = False, | ||||||
|         extend: bool = BACKWARD_EXTEND, |         extend: bool = False, | ||||||
|         scorer: Optional[Callable] = morphologizer_score, |         scorer: Optional[Callable] = morphologizer_score, | ||||||
|         save_activations: bool = False, |         save_activations: bool = False, | ||||||
|     ): |     ): | ||||||
|  | @ -113,6 +109,8 @@ class Morphologizer(Tagger): | ||||||
|         model (thinc.api.Model): The Thinc Model powering the pipeline component. |         model (thinc.api.Model): The Thinc Model powering the pipeline component. | ||||||
|         name (str): The component instance name, used to add entries to the |         name (str): The component instance name, used to add entries to the | ||||||
|             losses during training. |             losses during training. | ||||||
|  |         overwrite (bool): Whether to overwrite existing annotations. | ||||||
|  |         extend (bool): Whether to extend existing annotations. | ||||||
|         scorer (Optional[Callable]): The scoring method. Defaults to |         scorer (Optional[Callable]): The scoring method. Defaults to | ||||||
|             Scorer.score_token_attr for the attributes "pos" and "morph" and |             Scorer.score_token_attr for the attributes "pos" and "morph" and | ||||||
|             Scorer.score_token_attr_per_feat for the attribute "morph". |             Scorer.score_token_attr_per_feat for the attribute "morph". | ||||||
|  |  | ||||||
|  | @ -10,9 +10,6 @@ from ..language import Language | ||||||
| from ..scorer import Scorer | from ..scorer import Scorer | ||||||
| from .. import util | from .. import util | ||||||
| 
 | 
 | ||||||
| # see #9050 |  | ||||||
| BACKWARD_OVERWRITE = False |  | ||||||
| 
 |  | ||||||
| @Language.factory( | @Language.factory( | ||||||
|     "sentencizer", |     "sentencizer", | ||||||
|     assigns=["token.is_sent_start", "doc.sents"], |     assigns=["token.is_sent_start", "doc.sents"], | ||||||
|  | @ -52,13 +49,14 @@ class Sentencizer(Pipe): | ||||||
|         name="sentencizer", |         name="sentencizer", | ||||||
|         *, |         *, | ||||||
|         punct_chars=None, |         punct_chars=None, | ||||||
|         overwrite=BACKWARD_OVERWRITE, |         overwrite=False, | ||||||
|         scorer=senter_score, |         scorer=senter_score, | ||||||
|     ): |     ): | ||||||
|         """Initialize the sentencizer. |         """Initialize the sentencizer. | ||||||
| 
 | 
 | ||||||
|         punct_chars (list): Punctuation characters to split on. Will be |         punct_chars (list): Punctuation characters to split on. Will be | ||||||
|             serialized with the nlp object. |             serialized with the nlp object. | ||||||
|  |         overwrite (bool): Whether to overwrite existing annotations. | ||||||
|         scorer (Optional[Callable]): The scoring method. Defaults to |         scorer (Optional[Callable]): The scoring method. Defaults to | ||||||
|             Scorer.score_spans for the attribute "sents". |             Scorer.score_spans for the attribute "sents". | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -18,8 +18,6 @@ from ..training import validate_examples, validate_get_examples | ||||||
| from ..util import registry | from ..util import registry | ||||||
| from .. import util | from .. import util | ||||||
| 
 | 
 | ||||||
| # See #9050 |  | ||||||
| BACKWARD_OVERWRITE = False |  | ||||||
| 
 | 
 | ||||||
| default_model_config = """ | default_model_config = """ | ||||||
| [model] | [model] | ||||||
|  | @ -83,7 +81,7 @@ class SentenceRecognizer(Tagger): | ||||||
|         model, |         model, | ||||||
|         name="senter", |         name="senter", | ||||||
|         *, |         *, | ||||||
|         overwrite=BACKWARD_OVERWRITE, |         overwrite=False, | ||||||
|         scorer=senter_score, |         scorer=senter_score, | ||||||
|         save_activations: bool = False, |         save_activations: bool = False, | ||||||
|     ): |     ): | ||||||
|  | @ -93,6 +91,7 @@ class SentenceRecognizer(Tagger): | ||||||
|         model (thinc.api.Model): The Thinc Model powering the pipeline component. |         model (thinc.api.Model): The Thinc Model powering the pipeline component. | ||||||
|         name (str): The component instance name, used to add entries to the |         name (str): The component instance name, used to add entries to the | ||||||
|             losses during training. |             losses during training. | ||||||
|  |         overwrite (bool): Whether to overwrite existing annotations. | ||||||
|         scorer (Optional[Callable]): The scoring method. Defaults to |         scorer (Optional[Callable]): The scoring method. Defaults to | ||||||
|             Scorer.score_spans for the attribute "sents". |             Scorer.score_spans for the attribute "sents". | ||||||
|         save_activations (bool): save model activations in Doc when annotating. |         save_activations (bool): save model activations in Doc when annotating. | ||||||
|  |  | ||||||
|  | @ -27,9 +27,6 @@ from .. import util | ||||||
| 
 | 
 | ||||||
| ActivationsT = Dict[str, Union[List[Floats2d], List[Ints1d]]] | ActivationsT = Dict[str, Union[List[Floats2d], List[Ints1d]]] | ||||||
| 
 | 
 | ||||||
| # See #9050 |  | ||||||
| BACKWARD_OVERWRITE = False |  | ||||||
| 
 |  | ||||||
| default_model_config = """ | default_model_config = """ | ||||||
| [model] | [model] | ||||||
| @architectures = "spacy.Tagger.v2" | @architectures = "spacy.Tagger.v2" | ||||||
|  | @ -99,7 +96,7 @@ class Tagger(TrainablePipe): | ||||||
|         model, |         model, | ||||||
|         name="tagger", |         name="tagger", | ||||||
|         *, |         *, | ||||||
|         overwrite=BACKWARD_OVERWRITE, |         overwrite=False, | ||||||
|         scorer=tagger_score, |         scorer=tagger_score, | ||||||
|         neg_prefix="!", |         neg_prefix="!", | ||||||
|         save_activations: bool = False, |         save_activations: bool = False, | ||||||
|  | @ -110,6 +107,7 @@ class Tagger(TrainablePipe): | ||||||
|         model (thinc.api.Model): The Thinc Model powering the pipeline component. |         model (thinc.api.Model): The Thinc Model powering the pipeline component. | ||||||
|         name (str): The component instance name, used to add entries to the |         name (str): The component instance name, used to add entries to the | ||||||
|             losses during training. |             losses during training. | ||||||
|  |         overwrite (bool): Whether to overwrite existing annotations. | ||||||
|         scorer (Optional[Callable]): The scoring method. Defaults to |         scorer (Optional[Callable]): The scoring method. Defaults to | ||||||
|             Scorer.score_token_attr for the attribute "tag". |             Scorer.score_token_attr for the attribute "tag". | ||||||
|         save_activations (bool): save model activations in Doc when annotating. |         save_activations (bool): save model activations in Doc when annotating. | ||||||
|  |  | ||||||
|  | @ -63,7 +63,7 @@ architectures and their arguments and hyperparameters. | ||||||
| | `entity_vector_length`                          | Size of encoding vectors in the KB. Defaults to `64`. ~~int~~                                                                                                                                                                                                                               | | | `entity_vector_length`                          | Size of encoding vectors in the KB. Defaults to `64`. ~~int~~                                                                                                                                                                                                                               | | ||||||
| | `use_gold_ents`                                 | Whether to copy entities from the gold docs or not. Defaults to `True`. If `False`, entities must be set in the training data or by an annotating component in the pipeline. ~~int~~                                                                                                        | | | `use_gold_ents`                                 | Whether to copy entities from the gold docs or not. Defaults to `True`. If `False`, entities must be set in the training data or by an annotating component in the pipeline. ~~int~~                                                                                                        | | ||||||
| | `get_candidates`                                | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~                    | | | `get_candidates`                                | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~                    | | ||||||
| | `overwrite` <Tag variant="new">3.2</Tag>        | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                                                    | | | `overwrite` <Tag variant="new">3.2</Tag>        | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~                                                                                                                                                                                                                   | | ||||||
| | `scorer` <Tag variant="new">3.2</Tag>           | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~                                                                                                                                                                                     | | | `scorer` <Tag variant="new">3.2</Tag>           | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~                                                                                                                                                                                     | | ||||||
| | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"ents"` and `"scores"`. ~~Union[bool, list[str]]~~                                                                                                                                                                        | | | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"ents"` and `"scores"`. ~~Union[bool, list[str]]~~                                                                                                                                                                        | | ||||||
| | `threshold` <Tag variant="new">3.4</Tag>        | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ | | | `threshold` <Tag variant="new">3.4</Tag>        | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ | | ||||||
|  |  | ||||||
|  | @ -45,7 +45,7 @@ architectures and their arguments and hyperparameters. | ||||||
| | Setting                                         | Description                                                                                                                                                                                                                                                            | | | Setting                                         | Description                                                                                                                                                                                                                                                            | | ||||||
| | ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | | ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||||
| | `model`                                         | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                | | | `model`                                         | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                | | ||||||
| | `overwrite` <Tag variant="new">3.2</Tag>        | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                  | | | `overwrite` <Tag variant="new">3.2</Tag>        | Whether the values of existing features are overwritten. Defaults to `False`. ~~bool~~                                                                                                                                                                                 | | ||||||
| | `extend` <Tag variant="new">3.2</Tag>           | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~                                                                                                                      | | | `extend` <Tag variant="new">3.2</Tag>           | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~                                                                                                                      | | ||||||
| | `scorer` <Tag variant="new">3.2</Tag>           | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ | | | `scorer` <Tag variant="new">3.2</Tag>           | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ | | ||||||
| | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~                                                                                                                                       | | | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~                                                                                                                                       | | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user