Document scorers in registry and components from #8766 (#8929)

* Document scorers in registry and components from #8766

* Update spacy/pipeline/lemmatizer.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Update website/docs/api/dependencyparser.md

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Reformat

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
Adriane Boyd 2021-08-12 12:50:03 +02:00 committed by GitHub
parent 944ad6b1d4
commit b278f31ee6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 121 additions and 171 deletions

View File

@ -36,9 +36,7 @@ def make_attribute_ruler(
return AttributeRuler(nlp.vocab, name, validate=validate, scorer=scorer)
def attribute_ruler_score(
examples: Iterable[Example], **kwargs
) -> Dict[str, Any]:
def attribute_ruler_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
def morph_key_getter(token, attr):
return getattr(token, attr).key
@ -84,6 +82,10 @@ class AttributeRuler(Pipe):
vocab (Vocab): The vocab.
name (str): The pipe name. Defaults to "attribute_ruler".
scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_token_attr for the attributes "tag", "pos", "morph" and
"lemma" and Scorer.score_token_attr_per_feat for the attribute
"morph".
RETURNS (AttributeRuler): The AttributeRuler component.

View File

@ -102,6 +102,7 @@ def make_parser(
primarily affects the label accuracy, it can also affect the attachment
structure, as the labels are used to represent the pseudo-projectivity
transformation.
scorer (Optional[Callable]): The scoring method.
"""
return DependencyParser(
nlp.vocab,

View File

@ -83,6 +83,7 @@ def make_entity_linker(
entity_vector_length (int): Size of encoding vectors in the KB.
get_candidates (Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]): Function that
produces a list of candidates, given a certain knowledge base and a textual mention.
scorer (Optional[Callable]): The scoring method.
"""
return EntityLinker(
nlp.vocab,
@ -142,6 +143,8 @@ class EntityLinker(TrainablePipe):
entity_vector_length (int): Size of encoding vectors in the KB.
get_candidates (Callable[[KnowledgeBase, Span], Iterable[Candidate]]): Function that
produces a list of candidates, given a certain knowledge base and a textual mention.
scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_links.
DOCS: https://spacy.io/api/entitylinker#init
"""

View File

@ -106,6 +106,8 @@ class EntityRuler(Pipe):
overwrite_ents (bool): If existing entities are present, e.g. entities
added by the model, overwrite them by matches if necessary.
ent_id_sep (str): Separator used internally for entity IDs.
scorer (Optional[Callable]): The scoring method. Defaults to
spacy.scorer.get_ner_prf.
DOCS: https://spacy.io/api/entityruler#init
"""

View File

@ -90,6 +90,8 @@ class Lemmatizer(Pipe):
mode (str): The lemmatizer mode: "lookup", "rule". Defaults to "lookup".
overwrite (bool): Whether to overwrite existing lemmas. Defaults to
`False`.
scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_token_attr for the attribute "lemma".
DOCS: https://spacy.io/api/lemmatizer#init
"""

View File

@ -95,6 +95,9 @@ class Morphologizer(Tagger):
model (thinc.api.Model): The Thinc Model powering the pipeline component.
name (str): The component instance name, used to add entries to the
losses during training.
scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_token_attr for the attributes "pos" and "morph" and
Scorer.score_token_attr_per_feat for the attribute "morph".
DOCS: https://spacy.io/api/morphologizer#init
"""

View File

@ -82,6 +82,7 @@ def make_ner(
incorrect_spans_key (Optional[str]): Identifies spans that are known
to be incorrect entity annotations. The incorrect entity annotations
can be stored in the span group, under this key.
scorer (Optional[Callable]): The scoring method.
"""
return EntityRecognizer(
nlp.vocab,
@ -158,6 +159,7 @@ def make_beam_ner(
and are faster to compute.
incorrect_spans_key (Optional[str]): Optional key into span groups of
entities known to be non-entities.
scorer (Optional[Callable]): The scoring method.
"""
return EntityRecognizer(
nlp.vocab,

View File

@ -55,7 +55,8 @@ class Sentencizer(Pipe):
punct_chars (list): Punctuation characters to split on. Will be
serialized with the nlp object.
RETURNS (Sentencizer): The sentencizer component.
scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_spans for the attribute "sents".
DOCS: https://spacy.io/api/sentencizer#init
"""

View File

@ -69,6 +69,8 @@ class SentenceRecognizer(Tagger):
model (thinc.api.Model): The Thinc Model powering the pipeline component.
name (str): The component instance name, used to add entries to the
losses during training.
scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_spans for the attribute "sents".
DOCS: https://spacy.io/api/sentencerecognizer#init
"""

View File

@ -181,6 +181,22 @@ class SpanCategorizer(TrainablePipe):
scorer: Optional[Callable] = spancat_score,
) -> None:
"""Initialize the span categorizer.
vocab (Vocab): The shared vocabulary.
model (thinc.api.Model): The Thinc Model powering the pipeline component.
name (str): The component instance name, used to add entries to the
losses during training.
spans_key (str): Key of the Doc.spans dict to save the spans under.
During initialization and training, the component will look for
spans on the reference document under the same key. Defaults to
`"spans"`.
threshold (float): Minimum probability to consider a prediction
positive. Spans with a positive prediction will be saved on the Doc.
Defaults to 0.5.
max_positive (Optional[int]): Maximum number of labels to consider
positive per span. Defaults to None, indicating no limit.
scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_spans for the Doc.spans[spans_key] with overlapping
spans allowed.
DOCS: https://spacy.io/api/spancategorizer#init
"""

View File

@ -78,6 +78,8 @@ class Tagger(TrainablePipe):
model (thinc.api.Model): The Thinc Model powering the pipeline component.
name (str): The component instance name, used to add entries to the
losses during training.
scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_token_attr for the attribute "tag".
DOCS: https://spacy.io/api/tagger#init
"""

View File

@ -104,6 +104,7 @@ def make_textcat(
model (Model[List[Doc], List[Floats2d]]): A model instance that predicts
scores for each category.
threshold (float): Cutoff to consider a prediction "positive".
scorer (Optional[Callable]): The scoring method.
"""
return TextCategorizer(nlp.vocab, model, name, threshold=threshold, scorer=scorer)
@ -144,6 +145,8 @@ class TextCategorizer(TrainablePipe):
name (str): The component instance name, used to add entries to the
losses during training.
threshold (float): Cutoff to consider a prediction "positive".
scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_cats for the attribute "cats".
DOCS: https://spacy.io/api/textcategorizer#init
"""

View File

@ -87,6 +87,7 @@ cdef class Parser(TrainablePipe):
incorrect_spans_key (Optional[str]): Identifies spans that are known
to be incorrect entity annotations. The incorrect entity annotations
can be stored in the span group, under this key.
scorer (Optional[Callable]): The scoring method. Defaults to None.
"""
self.vocab = vocab
self.name = name

View File

@ -49,11 +49,12 @@ Initialize the attribute ruler.
> ```
| Name | Description |
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `vocab` | The shared vocabulary to pass to the matcher. ~~Vocab~~ |
| `name` | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. ~~str~~ |
| _keyword-only_ | |
| `validate` | Whether patterns should be validated (passed to the [`Matcher`](/api/matcher#init)). Defaults to `False`. ~~bool~~ |
| `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag`", `"pos"`, `"morph"` and `"lemma"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
## AttributeRuler.\_\_call\_\_ {#call tag="method"}
@ -175,21 +176,6 @@ Load attribute ruler patterns from morph rules.
| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `morph_rules` | The morph rules that map token text and fine-grained tags to coarse-grained tags, lemmas and morphological features. ~~Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]~~ |
## AttributeRuler.score {#score tag="method" new="3"}
Score a batch of examples.
> #### Example
>
> ```python
> scores = ruler.score(examples)
> ```
| Name | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `examples` | The examples to score. ~~Iterable[Example]~~ |
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ |
## AttributeRuler.to_disk {#to_disk tag="method"}
Serialize the pipe to disk.

View File

@ -91,6 +91,7 @@ shortcut for this and instantiate the component using its string name and
| `update_with_oracle_cut_size` | During training, cut long sequences into shorter segments by creating intermediate states based on the gold-standard history. The model is not very sensitive to this parameter, so you usually won't need to change it. Defaults to `100`. ~~int~~ |
| `learn_tokens` | Whether to learn to merge subtokens that are split relative to the gold standard. Experimental. Defaults to `False`. ~~bool~~ |
| `min_action_freq` | The minimum frequency of labelled actions to retain. Rarer labelled actions have their label backed-off to "dep". While this primarily affects the label accuracy, it can also affect the attachment structure, as the labels are used to represent the pseudo-projectivity transformation. ~~int~~ |
| `scorer` | The scoring method. Defaults to [`Scorer.score_deps`](/api/scorer#score_deps) for the attribute `"dep"` ignoring the labels `p` and `punct` and [`Scorer.score_spans`](/api/scorer/#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~ |
## DependencyParser.\_\_call\_\_ {#call tag="method"}
@ -259,21 +260,6 @@ predicted scores.
| `scores` | Scores representing the model's predictions. ~~StateClass~~ |
| **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ |
## DependencyParser.score {#score tag="method" new="3"}
Score a batch of examples.
> #### Example
>
> ```python
> scores = parser.score(examples)
> ```
| Name | Description |
| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `examples` | The examples to score. ~~Iterable[Example]~~ |
| **RETURNS** | The scores, produced by [`Scorer.score_spans`](/api/scorer#score_spans) and [`Scorer.score_deps`](/api/scorer#score_deps). ~~Dict[str, Union[float, Dict[str, float]]]~~ |
## DependencyParser.create_optimizer {#create_optimizer tag="method"}
Create an [`Optimizer`](https://thinc.ai/docs/api-optimizers) for the pipeline

View File

@ -50,6 +50,7 @@ architectures and their arguments and hyperparameters.
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [EntityLinker](/api/architectures#EntityLinker). ~~Model~~ |
| `entity_vector_length` | Size of encoding vectors in the KB. Defaults to `64`. ~~int~~ |
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
| `scorer` | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ |
```python
%%GITHUB_SPACY/spacy/pipeline/entity_linker.py
@ -259,21 +260,6 @@ pipe's entity linking model and context encoder. Delegates to
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
## EntityLinker.score {#score tag="method" new="3"}
Score a batch of examples.
> #### Example
>
> ```python
> scores = entity_linker.score(examples)
> ```
| Name | Description |
| ----------- | ---------------------------------------------------------------------------------------------- |
| `examples` | The examples to score. ~~Iterable[Example]~~ |
| **RETURNS** | The scores, produced by [`Scorer.score_links`](/api/scorer#score_links) . ~~Dict[str, float]~~ |
## EntityLinker.create_optimizer {#create_optimizer tag="method"}
Create an optimizer for the pipeline component.

View File

@ -48,6 +48,7 @@ architectures and their arguments and hyperparameters.
| `update_with_oracle_cut_size` | During training, cut long sequences into shorter segments by creating intermediate states based on the gold-standard history. The model is not very sensitive to this parameter, so you usually won't need to change it. Defaults to `100`. ~~int~~ |
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [TransitionBasedParser](/api/architectures#TransitionBasedParser). ~~Model[List[Doc], List[Floats2d]]~~ |
| `incorrect_spans_key` | This key refers to a `SpanGroup` in `doc.spans` that specifies incorrect spans. The NER wiill learn not to predict (exactly) those spans. Defaults to `None`. ~~Optional[str]~~ |
| `scorer` | The scoring method. Defaults to [`spacy.scorer.get_ner_prf`](/api/scorer#get_ner_prf). ~~Optional[Callable]~~ |
```python
%%GITHUB_SPACY/spacy/pipeline/ner.pyx
@ -251,21 +252,6 @@ predicted scores.
| `scores` | Scores representing the model's predictions. ~~StateClass~~ |
| **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ |
## EntityRecognizer.score {#score tag="method" new="3"}
Score a batch of examples.
> #### Example
>
> ```python
> scores = ner.score(examples)
> ```
| Name | Description |
| ----------- | --------------------------------------------------------- |
| `examples` | The examples to score. ~~Iterable[Example]~~ |
| **RETURNS** | The scores. ~~Dict[str, Union[float, Dict[str, float]]]~~ |
## EntityRecognizer.create_optimizer {#create_optimizer tag="method"}
Create an optimizer for the pipeline component.

View File

@ -40,6 +40,7 @@ how the component should be configured. You can override its settings via the
| `validate` | Whether patterns should be validated (passed to the `Matcher` and `PhraseMatcher`). Defaults to `False`. ~~bool~~ |
| `overwrite_ents` | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. Defaults to `False`. ~~bool~~ |
| `ent_id_sep` | Separator used internally for entity IDs. Defaults to `"\|\|"`. ~~str~~ |
| `scorer` | The scoring method. Defaults to [`spacy.scorer.get_ner_prf`](/api/scorer#get_ner_prf). ~~Optional[Callable]~~ |
```python
%%GITHUB_SPACY/spacy/pipeline/entityruler.py

View File

@ -48,10 +48,12 @@ data format used by the lookup and rule-based lemmatizers, see
> ```
| Setting | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
| -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `mode` | The lemmatizer mode, e.g. `"lookup"` or `"rule"`. Defaults to `lookup` if no language-specific lemmatizer is available (see the following table). ~~str~~ |
| `overwrite` | Whether to overwrite existing lemmas. Defaults to `False`. ~~bool~~ |
| `model` | **Not yet implemented:** the model to use. ~~Model~~ |
| _keyword-only_ | |
| `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"lemma"`. ~~Optional[Callable]~~ |
Many languages specify a default lemmatizer mode other than `lookup` if a better
lemmatizer is available. The lemmatizer modes `rule` and `pos_lookup` require

View File

@ -62,10 +62,12 @@ shortcut for this and instantiate the component using its string name and
> ```
| Name | Description |
| ------- | -------------------------------------------------------------------------------------------------------------------- |
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `vocab` | The shared vocabulary. ~~Vocab~~ |
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
| _keyword-only_ | |
| `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
## Morphologizer.\_\_call\_\_ {#call tag="method"}

View File

@ -298,8 +298,10 @@ Score a batch of examples.
> ```
| Name | Description |
| ----------- | ------------------------------------------------------------------------------------------------------- |
| -------------- | ------------------------------------------------------------------------------------------------------- |
| `examples` | The examples to score. ~~Iterable[Example]~~ |
| _keyword-only_ |
| `\*\*kwargs` | Any additional settings to pass on to the scorer. ~~Any~~ |
| **RETURNS** | The scores, e.g. produced by the [`Scorer`](/api/scorer). ~~Dict[str, Union[float, Dict[str, float]]]~~ |
## TrainablePipe.create_optimizer {#create_optimizer tag="method"}

View File

@ -28,8 +28,12 @@ Create a new `Scorer`.
> ```
| Name | Description |
| ----- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `nlp` | The pipeline to use for scoring, where each pipeline component may provide a scoring method. If none is provided, then a default pipeline for the multi-language code `xx` is constructed containing: `senter`, `tagger`, `morphologizer`, `parser`, `ner`, `textcat`. ~~Language~~ |
| ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `nlp` | The pipeline to use for scoring, where each pipeline component may provide a scoring method. If none is provided, then a default pipeline is constructed using the `default_lang` and `default_pipeline` settings. ~~Optional[Language]~~ |
| `default_lang` | The language to use for a default pipeline if `nlp` is not provided. Defaults to `xx`. ~~str~~ |
| `default_pipeline` | The pipeline components to use for a default pipeline if `nlp` is not provided. Defaults to `("senter", "tagger", "morphologizer", "parser", "ner", "textcat")`. ~~Iterable[string]~~ |
| _keyword-only_ | |
| `\*\*kwargs` | Any additional settings to pass on to the individual scoring methods. ~~Any~~ |
## Scorer.score {#score tag="method"}
@ -80,7 +84,7 @@ Docs with `has_unknown_spaces` are skipped during scoring.
> ```
| Name | Description |
| ----------- | ------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ |
| ----------- | ------------------------------------------------------------------------------------------------------------------- |
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
| **RETURNS** | `Dict` | A dictionary containing the scores `token_acc`, `token_p`, `token_r`, `token_f`. ~~Dict[str, float]]~~ |
@ -253,3 +257,11 @@ entities that overlap between the gold reference and the predictions.
| _keyword-only_ | |
| `negative_labels` | The string values that refer to no annotation (e.g. "NIL"). ~~Iterable[str]~~ |
| **RETURNS** | A dictionary containing the scores. ~~Dict[str, Optional[float]]~~ |
## get_ner_prf {#get_ner_prf new="3"}
Compute micro-PRF and per-entity PRF scores.
| Name | Description |
| ---------- | ------------------------------------------------------------------------------------------------------------------- |
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |

View File

@ -61,10 +61,12 @@ shortcut for this and instantiate the component using its string name and
[`nlp.add_pipe`](/api/language#add_pipe).
| Name | Description |
| ------- | -------------------------------------------------------------------------------------------------------------------- |
| -------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
| `vocab` | The shared vocabulary. ~~Vocab~~ |
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
| _keyword-only_ | |
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~ |
## SentenceRecognizer.\_\_call\_\_ {#call tag="method"}
@ -238,21 +240,6 @@ predicted scores.
| `scores` | Scores representing the model's predictions. |
| **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ |
## SentenceRecognizer.score {#score tag="method" new="3"}
Score a batch of examples.
> #### Example
>
> ```python
> scores = senter.score(examples)
> ```
| Name | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `examples` | The examples to score. ~~Iterable[Example]~~ |
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"`, `"tag"` and `"lemma"`. ~~Dict[str, float]~~ |
## SentenceRecognizer.create_optimizer {#create_optimizer tag="method"}
Create an optimizer for the pipeline component.

View File

@ -28,7 +28,7 @@ how the component should be configured. You can override its settings via the
> ```
| Setting | Description |
| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | ------ |
| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `punct_chars` | Optional custom list of punctuation characters that mark sentence ends. See below for defaults if not set. Defaults to `None`. ~~Optional[List[str]]~~ | `None` |
```python
@ -51,9 +51,10 @@ Initialize the sentencizer.
> ```
| Name | Description |
| -------------- | ----------------------------------------------------------------------------------------------------------------------- |
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------- |
| _keyword-only_ | |
| `punct_chars` | Optional custom list of punctuation characters that mark sentence ends. See below for defaults. ~~Optional[List[str]]~~ |
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"` ~~Optional[Callable]~~ |
```python
### punct_chars defaults
@ -112,21 +113,6 @@ applied to the `Doc` in order.
| `batch_size` | The number of documents to buffer. Defaults to `128`. ~~int~~ |
| **YIELDS** | The processed documents in order. ~~Doc~~ |
## Sentencizer.score {#score tag="method" new="3"}
Score a batch of examples.
> #### Example
>
> ```python
> scores = sentencizer.score(examples)
> ```
| Name | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------- |
| `examples` | The examples to score. ~~Iterable[Example]~~ |
| **RETURNS** | The scores, produced by [`Scorer.score_spans`](/api/scorer#score_spans). ~~Dict[str, Union[float, Dict[str, float]]~~ |
## Sentencizer.to_disk {#to_disk tag="method"}
Save the sentencizer settings (punctuation characters) to a directory. Will

View File

@ -43,6 +43,7 @@ architectures and their arguments and hyperparameters.
| `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"spans"`. ~~str~~ |
| `threshold` | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~ |
| `max_positive` | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~ |
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ |
```python
%%GITHUB_SPACY/spacy/pipeline/spancat.py
@ -241,22 +242,6 @@ predicted scores.
| `scores` | Scores representing the model's predictions. |
| **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ |
## SpanCategorizer.score {#score tag="method"}
Score a batch of examples.
> #### Example
>
> ```python
> scores = spancat.score(examples)
> ```
| Name | Description |
| -------------- | ---------------------------------------------------------------------------------------------------------------------- |
| `examples` | The examples to score. ~~Iterable[Example]~~ |
| _keyword-only_ | |
| **RETURNS** | The scores, produced by [`Scorer.score_spans`](/api/scorer#score_spans). ~~Dict[str, Union[float, Dict[str, float]]]~~ |
## SpanCategorizer.create_optimizer {#create_optimizer tag="method"}
Create an optimizer for the pipeline component.

View File

@ -55,10 +55,12 @@ shortcut for this and instantiate the component using its string name and
[`nlp.add_pipe`](/api/language#add_pipe).
| Name | Description |
| ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `vocab` | The shared vocabulary. ~~Vocab~~ |
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). ~~Model[List[Doc], List[Floats2d]]~~ |
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
| _keyword-only_ | |
| `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~ |
## Tagger.\_\_call\_\_ {#call tag="method"}
@ -249,21 +251,6 @@ predicted scores.
| `scores` | Scores representing the model's predictions. |
| **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ |
## Tagger.score {#score tag="method" new="3"}
Score a batch of examples.
> #### Example
>
> ```python
> scores = tagger.score(examples)
> ```
| Name | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------------------- |
| `examples` | The examples to score. ~~Iterable[Example]~~ |
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Dict[str, float]~~ |
## Tagger.create_optimizer {#create_optimizer tag="method"}
Create an optimizer for the pipeline component.

View File

@ -97,12 +97,13 @@ shortcut for this and instantiate the component using its string name and
[`nlp.add_pipe`](/api/language#create_pipe).
| Name | Description |
| -------------- | -------------------------------------------------------------------------------------------------------------------------- |
| -------------- | -------------------------------------------------------------------------------------------------------------------------------- |
| `vocab` | The shared vocabulary. ~~Vocab~~ |
| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
| _keyword-only_ | |
| `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ |
| `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ |
## TextCategorizer.\_\_call\_\_ {#call tag="method"}

View File

@ -373,6 +373,7 @@ factories.
| `optimizers` | Registry for functions that create [optimizers](https://thinc.ai/docs/api-optimizers). |
| `readers` | Registry for file and data readers, including training and evaluation data readers like [`Corpus`](/api/corpus). |
| `schedules` | Registry for functions that create [schedules](https://thinc.ai/docs/api-schedules). |
| `scorers` | Registry for functions that create scoring methods for user with the [`Scorer`](/api/scorer). Scoring methods are called with `Iterable[Example]` and arbitrary `\*\*kwargs` and return scores as `Dict[str, Any]`. |
| `tokenizers` | Registry for tokenizer factories. Registered functions should return a callback that receives the `nlp` object and returns a [`Tokenizer`](/api/tokenizer) or a custom callable. |
### spacy-transformers registry {#registry-transformers}