mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-14 03:26:24 +03:00
Cleanup/remove backwards compat overwrite settings (#11888)
* Remove backwards-compatible overwrite from Entity Linker This also adds a docstring about overwrite, since it wasn't present. * Fix docstring * Remove backward compat settings in Morphologizer This also needed a docstring added. For this component it's less clear what the right overwrite settings are. * Remove backward compat from sentencizer This was simple * Remove backward compat from senter Another simple one * Remove backward compat setting from tagger * Add docstrings * Update spacy/pipeline/morphologizer.pyx Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Update docs --------- Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
parent
6920fb7baf
commit
89f974d4f5
|
@ -27,9 +27,6 @@ ActivationsT = Dict[str, Union[List[Ragged], List[str]]]
|
||||||
|
|
||||||
KNOWLEDGE_BASE_IDS = "kb_ids"
|
KNOWLEDGE_BASE_IDS = "kb_ids"
|
||||||
|
|
||||||
# See #9050
|
|
||||||
BACKWARD_OVERWRITE = True
|
|
||||||
|
|
||||||
default_model_config = """
|
default_model_config = """
|
||||||
[model]
|
[model]
|
||||||
@architectures = "spacy.EntityLinker.v2"
|
@architectures = "spacy.EntityLinker.v2"
|
||||||
|
@ -60,7 +57,7 @@ DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"]
|
||||||
"entity_vector_length": 64,
|
"entity_vector_length": 64,
|
||||||
"get_candidates": {"@misc": "spacy.CandidateGenerator.v1"},
|
"get_candidates": {"@misc": "spacy.CandidateGenerator.v1"},
|
||||||
"get_candidates_batch": {"@misc": "spacy.CandidateBatchGenerator.v1"},
|
"get_candidates_batch": {"@misc": "spacy.CandidateBatchGenerator.v1"},
|
||||||
"overwrite": True,
|
"overwrite": False,
|
||||||
"scorer": {"@scorers": "spacy.entity_linker_scorer.v1"},
|
"scorer": {"@scorers": "spacy.entity_linker_scorer.v1"},
|
||||||
"use_gold_ents": True,
|
"use_gold_ents": True,
|
||||||
"candidates_batch_size": 1,
|
"candidates_batch_size": 1,
|
||||||
|
@ -191,7 +188,7 @@ class EntityLinker(TrainablePipe):
|
||||||
get_candidates_batch: Callable[
|
get_candidates_batch: Callable[
|
||||||
[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]
|
[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]
|
||||||
],
|
],
|
||||||
overwrite: bool = BACKWARD_OVERWRITE,
|
overwrite: bool = False,
|
||||||
scorer: Optional[Callable] = entity_linker_score,
|
scorer: Optional[Callable] = entity_linker_score,
|
||||||
use_gold_ents: bool,
|
use_gold_ents: bool,
|
||||||
candidates_batch_size: int,
|
candidates_batch_size: int,
|
||||||
|
@ -215,6 +212,7 @@ class EntityLinker(TrainablePipe):
|
||||||
Callable[[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]],
|
Callable[[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]],
|
||||||
Iterable[Candidate]]
|
Iterable[Candidate]]
|
||||||
): Function that produces a list of candidates, given a certain knowledge base and several textual mentions.
|
): Function that produces a list of candidates, given a certain knowledge base and several textual mentions.
|
||||||
|
overwrite (bool): Whether to overwrite existing non-empty annotations.
|
||||||
scorer (Optional[Callable]): The scoring method. Defaults to Scorer.score_links.
|
scorer (Optional[Callable]): The scoring method. Defaults to Scorer.score_links.
|
||||||
use_gold_ents (bool): Whether to copy entities from gold docs or not. If false, another
|
use_gold_ents (bool): Whether to copy entities from gold docs or not. If false, another
|
||||||
component must provide entity annotations.
|
component must provide entity annotations.
|
||||||
|
|
|
@ -21,10 +21,6 @@ from ..scorer import Scorer
|
||||||
from ..training import validate_examples, validate_get_examples
|
from ..training import validate_examples, validate_get_examples
|
||||||
from ..util import registry
|
from ..util import registry
|
||||||
|
|
||||||
# See #9050
|
|
||||||
BACKWARD_OVERWRITE = True
|
|
||||||
BACKWARD_EXTEND = False
|
|
||||||
|
|
||||||
default_model_config = """
|
default_model_config = """
|
||||||
[model]
|
[model]
|
||||||
@architectures = "spacy.Tagger.v2"
|
@architectures = "spacy.Tagger.v2"
|
||||||
|
@ -102,8 +98,8 @@ class Morphologizer(Tagger):
|
||||||
model: Model,
|
model: Model,
|
||||||
name: str = "morphologizer",
|
name: str = "morphologizer",
|
||||||
*,
|
*,
|
||||||
overwrite: bool = BACKWARD_OVERWRITE,
|
overwrite: bool = False,
|
||||||
extend: bool = BACKWARD_EXTEND,
|
extend: bool = False,
|
||||||
scorer: Optional[Callable] = morphologizer_score,
|
scorer: Optional[Callable] = morphologizer_score,
|
||||||
save_activations: bool = False,
|
save_activations: bool = False,
|
||||||
):
|
):
|
||||||
|
@ -113,6 +109,8 @@ class Morphologizer(Tagger):
|
||||||
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
||||||
name (str): The component instance name, used to add entries to the
|
name (str): The component instance name, used to add entries to the
|
||||||
losses during training.
|
losses during training.
|
||||||
|
overwrite (bool): Whether to overwrite existing annotations.
|
||||||
|
extend (bool): Whether to extend existing annotations.
|
||||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||||
Scorer.score_token_attr for the attributes "pos" and "morph" and
|
Scorer.score_token_attr for the attributes "pos" and "morph" and
|
||||||
Scorer.score_token_attr_per_feat for the attribute "morph".
|
Scorer.score_token_attr_per_feat for the attribute "morph".
|
||||||
|
|
|
@ -10,9 +10,6 @@ from ..language import Language
|
||||||
from ..scorer import Scorer
|
from ..scorer import Scorer
|
||||||
from .. import util
|
from .. import util
|
||||||
|
|
||||||
# see #9050
|
|
||||||
BACKWARD_OVERWRITE = False
|
|
||||||
|
|
||||||
@Language.factory(
|
@Language.factory(
|
||||||
"sentencizer",
|
"sentencizer",
|
||||||
assigns=["token.is_sent_start", "doc.sents"],
|
assigns=["token.is_sent_start", "doc.sents"],
|
||||||
|
@ -52,13 +49,14 @@ class Sentencizer(Pipe):
|
||||||
name="sentencizer",
|
name="sentencizer",
|
||||||
*,
|
*,
|
||||||
punct_chars=None,
|
punct_chars=None,
|
||||||
overwrite=BACKWARD_OVERWRITE,
|
overwrite=False,
|
||||||
scorer=senter_score,
|
scorer=senter_score,
|
||||||
):
|
):
|
||||||
"""Initialize the sentencizer.
|
"""Initialize the sentencizer.
|
||||||
|
|
||||||
punct_chars (list): Punctuation characters to split on. Will be
|
punct_chars (list): Punctuation characters to split on. Will be
|
||||||
serialized with the nlp object.
|
serialized with the nlp object.
|
||||||
|
overwrite (bool): Whether to overwrite existing annotations.
|
||||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||||
Scorer.score_spans for the attribute "sents".
|
Scorer.score_spans for the attribute "sents".
|
||||||
|
|
||||||
|
|
|
@ -18,8 +18,6 @@ from ..training import validate_examples, validate_get_examples
|
||||||
from ..util import registry
|
from ..util import registry
|
||||||
from .. import util
|
from .. import util
|
||||||
|
|
||||||
# See #9050
|
|
||||||
BACKWARD_OVERWRITE = False
|
|
||||||
|
|
||||||
default_model_config = """
|
default_model_config = """
|
||||||
[model]
|
[model]
|
||||||
|
@ -83,7 +81,7 @@ class SentenceRecognizer(Tagger):
|
||||||
model,
|
model,
|
||||||
name="senter",
|
name="senter",
|
||||||
*,
|
*,
|
||||||
overwrite=BACKWARD_OVERWRITE,
|
overwrite=False,
|
||||||
scorer=senter_score,
|
scorer=senter_score,
|
||||||
save_activations: bool = False,
|
save_activations: bool = False,
|
||||||
):
|
):
|
||||||
|
@ -93,6 +91,7 @@ class SentenceRecognizer(Tagger):
|
||||||
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
||||||
name (str): The component instance name, used to add entries to the
|
name (str): The component instance name, used to add entries to the
|
||||||
losses during training.
|
losses during training.
|
||||||
|
overwrite (bool): Whether to overwrite existing annotations.
|
||||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||||
Scorer.score_spans for the attribute "sents".
|
Scorer.score_spans for the attribute "sents".
|
||||||
save_activations (bool): save model activations in Doc when annotating.
|
save_activations (bool): save model activations in Doc when annotating.
|
||||||
|
|
|
@ -27,9 +27,6 @@ from .. import util
|
||||||
|
|
||||||
ActivationsT = Dict[str, Union[List[Floats2d], List[Ints1d]]]
|
ActivationsT = Dict[str, Union[List[Floats2d], List[Ints1d]]]
|
||||||
|
|
||||||
# See #9050
|
|
||||||
BACKWARD_OVERWRITE = False
|
|
||||||
|
|
||||||
default_model_config = """
|
default_model_config = """
|
||||||
[model]
|
[model]
|
||||||
@architectures = "spacy.Tagger.v2"
|
@architectures = "spacy.Tagger.v2"
|
||||||
|
@ -99,7 +96,7 @@ class Tagger(TrainablePipe):
|
||||||
model,
|
model,
|
||||||
name="tagger",
|
name="tagger",
|
||||||
*,
|
*,
|
||||||
overwrite=BACKWARD_OVERWRITE,
|
overwrite=False,
|
||||||
scorer=tagger_score,
|
scorer=tagger_score,
|
||||||
neg_prefix="!",
|
neg_prefix="!",
|
||||||
save_activations: bool = False,
|
save_activations: bool = False,
|
||||||
|
@ -110,6 +107,7 @@ class Tagger(TrainablePipe):
|
||||||
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
||||||
name (str): The component instance name, used to add entries to the
|
name (str): The component instance name, used to add entries to the
|
||||||
losses during training.
|
losses during training.
|
||||||
|
overwrite (bool): Whether to overwrite existing annotations.
|
||||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||||
Scorer.score_token_attr for the attribute "tag".
|
Scorer.score_token_attr for the attribute "tag".
|
||||||
save_activations (bool): save model activations in Doc when annotating.
|
save_activations (bool): save model activations in Doc when annotating.
|
||||||
|
|
|
@ -63,7 +63,7 @@ architectures and their arguments and hyperparameters.
|
||||||
| `entity_vector_length` | Size of encoding vectors in the KB. Defaults to `64`. ~~int~~ |
|
| `entity_vector_length` | Size of encoding vectors in the KB. Defaults to `64`. ~~int~~ |
|
||||||
| `use_gold_ents` | Whether to copy entities from the gold docs or not. Defaults to `True`. If `False`, entities must be set in the training data or by an annotating component in the pipeline. ~~int~~ |
|
| `use_gold_ents` | Whether to copy entities from the gold docs or not. Defaults to `True`. If `False`, entities must be set in the training data or by an annotating component in the pipeline. ~~int~~ |
|
||||||
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
||||||
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~ |
|
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ |
|
||||||
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ |
|
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ |
|
||||||
| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"ents"` and `"scores"`. ~~Union[bool, list[str]]~~ |
|
| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"ents"` and `"scores"`. ~~Union[bool, list[str]]~~ |
|
||||||
| `threshold` <Tag variant="new">3.4</Tag> | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ |
|
| `threshold` <Tag variant="new">3.4</Tag> | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ |
|
||||||
|
|
|
@ -45,7 +45,7 @@ architectures and their arguments and hyperparameters.
|
||||||
| Setting | Description |
|
| Setting | Description |
|
||||||
| ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `model` | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
|
| `model` | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||||
| `overwrite` <Tag variant="new">3.2</Tag> | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~ |
|
| `overwrite` <Tag variant="new">3.2</Tag> | Whether the values of existing features are overwritten. Defaults to `False`. ~~bool~~ |
|
||||||
| `extend` <Tag variant="new">3.2</Tag> | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~ |
|
| `extend` <Tag variant="new">3.2</Tag> | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~ |
|
||||||
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
|
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
|
||||||
| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~ |
|
| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~ |
|
||||||
|
|
Loading…
Reference in New Issue
Block a user