mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-01 00:17:44 +03:00 
			
		
		
		
	* Document scorers in registry and components from #8766 * Update spacy/pipeline/lemmatizer.py Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Update website/docs/api/dependencyparser.md Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Reformat Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									944ad6b1d4
								
							
						
					
					
						commit
						b278f31ee6
					
				|  | @ -36,9 +36,7 @@ def make_attribute_ruler( | ||||||
|     return AttributeRuler(nlp.vocab, name, validate=validate, scorer=scorer) |     return AttributeRuler(nlp.vocab, name, validate=validate, scorer=scorer) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def attribute_ruler_score( | def attribute_ruler_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]: | ||||||
|     examples: Iterable[Example], **kwargs |  | ||||||
| ) -> Dict[str, Any]: |  | ||||||
|     def morph_key_getter(token, attr): |     def morph_key_getter(token, attr): | ||||||
|         return getattr(token, attr).key |         return getattr(token, attr).key | ||||||
| 
 | 
 | ||||||
|  | @ -84,6 +82,10 @@ class AttributeRuler(Pipe): | ||||||
| 
 | 
 | ||||||
|         vocab (Vocab): The vocab. |         vocab (Vocab): The vocab. | ||||||
|         name (str): The pipe name. Defaults to "attribute_ruler". |         name (str): The pipe name. Defaults to "attribute_ruler". | ||||||
|  |         scorer (Optional[Callable]): The scoring method. Defaults to | ||||||
|  |             Scorer.score_token_attr for the attributes "tag", "pos", "morph" and | ||||||
|  |             "lemma" and Scorer.score_token_attr_per_feat for the attribute | ||||||
|  |             "morph". | ||||||
| 
 | 
 | ||||||
|         RETURNS (AttributeRuler): The AttributeRuler component. |         RETURNS (AttributeRuler): The AttributeRuler component. | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -102,6 +102,7 @@ def make_parser( | ||||||
|         primarily affects the label accuracy, it can also affect the attachment |         primarily affects the label accuracy, it can also affect the attachment | ||||||
|         structure, as the labels are used to represent the pseudo-projectivity |         structure, as the labels are used to represent the pseudo-projectivity | ||||||
|         transformation. |         transformation. | ||||||
|  |     scorer (Optional[Callable]): The scoring method. | ||||||
|     """ |     """ | ||||||
|     return DependencyParser( |     return DependencyParser( | ||||||
|         nlp.vocab, |         nlp.vocab, | ||||||
|  |  | ||||||
|  | @ -83,6 +83,7 @@ def make_entity_linker( | ||||||
|     entity_vector_length (int): Size of encoding vectors in the KB. |     entity_vector_length (int): Size of encoding vectors in the KB. | ||||||
|     get_candidates (Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]): Function that |     get_candidates (Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]): Function that | ||||||
|         produces a list of candidates, given a certain knowledge base and a textual mention. |         produces a list of candidates, given a certain knowledge base and a textual mention. | ||||||
|  |     scorer (Optional[Callable]): The scoring method. | ||||||
|     """ |     """ | ||||||
|     return EntityLinker( |     return EntityLinker( | ||||||
|         nlp.vocab, |         nlp.vocab, | ||||||
|  | @ -142,6 +143,8 @@ class EntityLinker(TrainablePipe): | ||||||
|         entity_vector_length (int): Size of encoding vectors in the KB. |         entity_vector_length (int): Size of encoding vectors in the KB. | ||||||
|         get_candidates (Callable[[KnowledgeBase, Span], Iterable[Candidate]]): Function that |         get_candidates (Callable[[KnowledgeBase, Span], Iterable[Candidate]]): Function that | ||||||
|             produces a list of candidates, given a certain knowledge base and a textual mention. |             produces a list of candidates, given a certain knowledge base and a textual mention. | ||||||
|  |         scorer (Optional[Callable]): The scoring method. Defaults to | ||||||
|  |             Scorer.score_links. | ||||||
| 
 | 
 | ||||||
|         DOCS: https://spacy.io/api/entitylinker#init |         DOCS: https://spacy.io/api/entitylinker#init | ||||||
|         """ |         """ | ||||||
|  |  | ||||||
|  | @ -106,6 +106,8 @@ class EntityRuler(Pipe): | ||||||
|         overwrite_ents (bool): If existing entities are present, e.g. entities |         overwrite_ents (bool): If existing entities are present, e.g. entities | ||||||
|             added by the model, overwrite them by matches if necessary. |             added by the model, overwrite them by matches if necessary. | ||||||
|         ent_id_sep (str): Separator used internally for entity IDs. |         ent_id_sep (str): Separator used internally for entity IDs. | ||||||
|  |         scorer (Optional[Callable]): The scoring method. Defaults to | ||||||
|  |             spacy.scorer.get_ner_prf. | ||||||
| 
 | 
 | ||||||
|         DOCS: https://spacy.io/api/entityruler#init |         DOCS: https://spacy.io/api/entityruler#init | ||||||
|         """ |         """ | ||||||
|  |  | ||||||
|  | @ -90,6 +90,8 @@ class Lemmatizer(Pipe): | ||||||
|         mode (str): The lemmatizer mode: "lookup", "rule". Defaults to "lookup". |         mode (str): The lemmatizer mode: "lookup", "rule". Defaults to "lookup". | ||||||
|         overwrite (bool): Whether to overwrite existing lemmas. Defaults to |         overwrite (bool): Whether to overwrite existing lemmas. Defaults to | ||||||
|             `False`. |             `False`. | ||||||
|  |         scorer (Optional[Callable]): The scoring method. Defaults to | ||||||
|  |             Scorer.score_token_attr for the attribute "lemma". | ||||||
| 
 | 
 | ||||||
|         DOCS: https://spacy.io/api/lemmatizer#init |         DOCS: https://spacy.io/api/lemmatizer#init | ||||||
|         """ |         """ | ||||||
|  |  | ||||||
|  | @ -95,6 +95,9 @@ class Morphologizer(Tagger): | ||||||
|         model (thinc.api.Model): The Thinc Model powering the pipeline component. |         model (thinc.api.Model): The Thinc Model powering the pipeline component. | ||||||
|         name (str): The component instance name, used to add entries to the |         name (str): The component instance name, used to add entries to the | ||||||
|             losses during training. |             losses during training. | ||||||
|  |         scorer (Optional[Callable]): The scoring method. Defaults to | ||||||
|  |             Scorer.score_token_attr for the attributes "pos" and "morph" and | ||||||
|  |             Scorer.score_token_attr_per_feat for the attribute "morph". | ||||||
| 
 | 
 | ||||||
|         DOCS: https://spacy.io/api/morphologizer#init |         DOCS: https://spacy.io/api/morphologizer#init | ||||||
|         """ |         """ | ||||||
|  |  | ||||||
|  | @ -82,6 +82,7 @@ def make_ner( | ||||||
|     incorrect_spans_key (Optional[str]): Identifies spans that are known |     incorrect_spans_key (Optional[str]): Identifies spans that are known | ||||||
|         to be incorrect entity annotations. The incorrect entity annotations |         to be incorrect entity annotations. The incorrect entity annotations | ||||||
|         can be stored in the span group, under this key. |         can be stored in the span group, under this key. | ||||||
|  |     scorer (Optional[Callable]): The scoring method. | ||||||
|     """ |     """ | ||||||
|     return EntityRecognizer( |     return EntityRecognizer( | ||||||
|         nlp.vocab, |         nlp.vocab, | ||||||
|  | @ -158,6 +159,7 @@ def make_beam_ner( | ||||||
|         and are faster to compute. |         and are faster to compute. | ||||||
|     incorrect_spans_key (Optional[str]): Optional key into span groups of |     incorrect_spans_key (Optional[str]): Optional key into span groups of | ||||||
|         entities known to be non-entities. |         entities known to be non-entities. | ||||||
|  |     scorer (Optional[Callable]): The scoring method. | ||||||
|     """ |     """ | ||||||
|     return EntityRecognizer( |     return EntityRecognizer( | ||||||
|         nlp.vocab, |         nlp.vocab, | ||||||
|  |  | ||||||
|  | @ -55,7 +55,8 @@ class Sentencizer(Pipe): | ||||||
| 
 | 
 | ||||||
|         punct_chars (list): Punctuation characters to split on. Will be |         punct_chars (list): Punctuation characters to split on. Will be | ||||||
|             serialized with the nlp object. |             serialized with the nlp object. | ||||||
|         RETURNS (Sentencizer): The sentencizer component. |         scorer (Optional[Callable]): The scoring method. Defaults to | ||||||
|  |             Scorer.score_spans for the attribute "sents". | ||||||
| 
 | 
 | ||||||
|         DOCS: https://spacy.io/api/sentencizer#init |         DOCS: https://spacy.io/api/sentencizer#init | ||||||
|         """ |         """ | ||||||
|  |  | ||||||
|  | @ -69,6 +69,8 @@ class SentenceRecognizer(Tagger): | ||||||
|         model (thinc.api.Model): The Thinc Model powering the pipeline component. |         model (thinc.api.Model): The Thinc Model powering the pipeline component. | ||||||
|         name (str): The component instance name, used to add entries to the |         name (str): The component instance name, used to add entries to the | ||||||
|             losses during training. |             losses during training. | ||||||
|  |         scorer (Optional[Callable]): The scoring method. Defaults to | ||||||
|  |             Scorer.score_spans for the attribute "sents". | ||||||
| 
 | 
 | ||||||
|         DOCS: https://spacy.io/api/sentencerecognizer#init |         DOCS: https://spacy.io/api/sentencerecognizer#init | ||||||
|         """ |         """ | ||||||
|  |  | ||||||
|  | @ -181,6 +181,22 @@ class SpanCategorizer(TrainablePipe): | ||||||
|         scorer: Optional[Callable] = spancat_score, |         scorer: Optional[Callable] = spancat_score, | ||||||
|     ) -> None: |     ) -> None: | ||||||
|         """Initialize the span categorizer. |         """Initialize the span categorizer. | ||||||
|  |         vocab (Vocab): The shared vocabulary. | ||||||
|  |         model (thinc.api.Model): The Thinc Model powering the pipeline component. | ||||||
|  |         name (str): The component instance name, used to add entries to the | ||||||
|  |             losses during training. | ||||||
|  |         spans_key (str): Key of the Doc.spans dict to save the spans under. | ||||||
|  |             During initialization and training, the component will look for | ||||||
|  |             spans on the reference document under the same key. Defaults to | ||||||
|  |             `"spans"`. | ||||||
|  |         threshold (float): Minimum probability to consider a prediction | ||||||
|  |             positive. Spans with a positive prediction will be saved on the Doc. | ||||||
|  |             Defaults to 0.5. | ||||||
|  |         max_positive (Optional[int]): Maximum number of labels to consider | ||||||
|  |             positive per span. Defaults to None, indicating no limit. | ||||||
|  |         scorer (Optional[Callable]): The scoring method. Defaults to | ||||||
|  |             Scorer.score_spans for the Doc.spans[spans_key] with overlapping | ||||||
|  |             spans allowed. | ||||||
| 
 | 
 | ||||||
|         DOCS: https://spacy.io/api/spancategorizer#init |         DOCS: https://spacy.io/api/spancategorizer#init | ||||||
|         """ |         """ | ||||||
|  |  | ||||||
|  | @ -78,6 +78,8 @@ class Tagger(TrainablePipe): | ||||||
|         model (thinc.api.Model): The Thinc Model powering the pipeline component. |         model (thinc.api.Model): The Thinc Model powering the pipeline component. | ||||||
|         name (str): The component instance name, used to add entries to the |         name (str): The component instance name, used to add entries to the | ||||||
|             losses during training. |             losses during training. | ||||||
|  |         scorer (Optional[Callable]): The scoring method. Defaults to | ||||||
|  |             Scorer.score_token_attr for the attribute "tag". | ||||||
| 
 | 
 | ||||||
|         DOCS: https://spacy.io/api/tagger#init |         DOCS: https://spacy.io/api/tagger#init | ||||||
|         """ |         """ | ||||||
|  |  | ||||||
|  | @ -104,6 +104,7 @@ def make_textcat( | ||||||
|     model (Model[List[Doc], List[Floats2d]]): A model instance that predicts |     model (Model[List[Doc], List[Floats2d]]): A model instance that predicts | ||||||
|         scores for each category. |         scores for each category. | ||||||
|     threshold (float): Cutoff to consider a prediction "positive". |     threshold (float): Cutoff to consider a prediction "positive". | ||||||
|  |     scorer (Optional[Callable]): The scoring method. | ||||||
|     """ |     """ | ||||||
|     return TextCategorizer(nlp.vocab, model, name, threshold=threshold, scorer=scorer) |     return TextCategorizer(nlp.vocab, model, name, threshold=threshold, scorer=scorer) | ||||||
| 
 | 
 | ||||||
|  | @ -144,6 +145,8 @@ class TextCategorizer(TrainablePipe): | ||||||
|         name (str): The component instance name, used to add entries to the |         name (str): The component instance name, used to add entries to the | ||||||
|             losses during training. |             losses during training. | ||||||
|         threshold (float): Cutoff to consider a prediction "positive". |         threshold (float): Cutoff to consider a prediction "positive". | ||||||
|  |         scorer (Optional[Callable]): The scoring method. Defaults to | ||||||
|  |                 Scorer.score_cats for the attribute "cats". | ||||||
| 
 | 
 | ||||||
|         DOCS: https://spacy.io/api/textcategorizer#init |         DOCS: https://spacy.io/api/textcategorizer#init | ||||||
|         """ |         """ | ||||||
|  |  | ||||||
|  | @ -87,6 +87,7 @@ cdef class Parser(TrainablePipe): | ||||||
|         incorrect_spans_key (Optional[str]): Identifies spans that are known |         incorrect_spans_key (Optional[str]): Identifies spans that are known | ||||||
|             to be incorrect entity annotations. The incorrect entity annotations |             to be incorrect entity annotations. The incorrect entity annotations | ||||||
|             can be stored in the span group, under this key. |             can be stored in the span group, under this key. | ||||||
|  |         scorer (Optional[Callable]): The scoring method. Defaults to None. | ||||||
|         """ |         """ | ||||||
|         self.vocab = vocab |         self.vocab = vocab | ||||||
|         self.name = name |         self.name = name | ||||||
|  |  | ||||||
|  | @ -49,11 +49,12 @@ Initialize the attribute ruler. | ||||||
| > ``` | > ``` | ||||||
| 
 | 
 | ||||||
| | Name           | Description                                                                                                                                                                                                                                                                                | | | Name           | Description                                                                                                                                                                                                                                                                                | | ||||||
| | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | | | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | ||||||
| | `vocab`        | The shared vocabulary to pass to the matcher. ~~Vocab~~                                                                                                                                                                                                                                    | | | `vocab`        | The shared vocabulary to pass to the matcher. ~~Vocab~~                                                                                                                                                                                                                                    | | ||||||
| | `name`         | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. ~~str~~                                                                                                                                                   | | | `name`         | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. ~~str~~                                                                                                                                                   | | ||||||
| | _keyword-only_ |                                                                                                                                                                                                                                                                                            | | | _keyword-only_ |                                                                                                                                                                                                                                                                                            | | ||||||
| | `validate`     | Whether patterns should be validated (passed to the [`Matcher`](/api/matcher#init)). Defaults to `False`. ~~bool~~                                                                                                                                                                         | | | `validate`     | Whether patterns should be validated (passed to the [`Matcher`](/api/matcher#init)). Defaults to `False`. ~~bool~~                                                                                                                                                                         | | ||||||
|  | | `scorer`       | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag`", `"pos"`, `"morph"` and `"lemma"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ | | ||||||
| 
 | 
 | ||||||
| ## AttributeRuler.\_\_call\_\_ {#call tag="method"} | ## AttributeRuler.\_\_call\_\_ {#call tag="method"} | ||||||
| 
 | 
 | ||||||
|  | @ -175,21 +176,6 @@ Load attribute ruler patterns from morph rules. | ||||||
| | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||||
| | `morph_rules` | The morph rules that map token text and fine-grained tags to coarse-grained tags, lemmas and morphological features. ~~Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]~~ | | | `morph_rules` | The morph rules that map token text and fine-grained tags to coarse-grained tags, lemmas and morphological features. ~~Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]~~ | | ||||||
| 
 | 
 | ||||||
| ## AttributeRuler.score {#score tag="method" new="3"} |  | ||||||
| 
 |  | ||||||
| Score a batch of examples. |  | ||||||
| 
 |  | ||||||
| > #### Example |  | ||||||
| > |  | ||||||
| > ```python |  | ||||||
| > scores = ruler.score(examples) |  | ||||||
| > ``` |  | ||||||
| 
 |  | ||||||
| | Name        | Description                                                                                                                                                                                                           | |  | ||||||
| | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |  | ||||||
| | `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                                                                                                          | |  | ||||||
| | **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ | |  | ||||||
| 
 |  | ||||||
| ## AttributeRuler.to_disk {#to_disk tag="method"} | ## AttributeRuler.to_disk {#to_disk tag="method"} | ||||||
| 
 | 
 | ||||||
| Serialize the pipe to disk. | Serialize the pipe to disk. | ||||||
|  |  | ||||||
|  | @ -91,6 +91,7 @@ shortcut for this and instantiate the component using its string name and | ||||||
| | `update_with_oracle_cut_size` | During training, cut long sequences into shorter segments by creating intermediate states based on the gold-standard history. The model is not very sensitive to this parameter, so you usually won't need to change it. Defaults to `100`. ~~int~~                                                 | | | `update_with_oracle_cut_size` | During training, cut long sequences into shorter segments by creating intermediate states based on the gold-standard history. The model is not very sensitive to this parameter, so you usually won't need to change it. Defaults to `100`. ~~int~~                                                 | | ||||||
| | `learn_tokens`                | Whether to learn to merge subtokens that are split relative to the gold standard. Experimental. Defaults to `False`. ~~bool~~                                                                                                                                                                       | | | `learn_tokens`                | Whether to learn to merge subtokens that are split relative to the gold standard. Experimental. Defaults to `False`. ~~bool~~                                                                                                                                                                       | | ||||||
| | `min_action_freq`             | The minimum frequency of labelled actions to retain. Rarer labelled actions have their label backed-off to "dep". While this primarily affects the label accuracy, it can also affect the attachment structure, as the labels are used to represent the pseudo-projectivity transformation. ~~int~~ | | | `min_action_freq`             | The minimum frequency of labelled actions to retain. Rarer labelled actions have their label backed-off to "dep". While this primarily affects the label accuracy, it can also affect the attachment structure, as the labels are used to represent the pseudo-projectivity transformation. ~~int~~ | | ||||||
|  | | `scorer`                      | The scoring method. Defaults to [`Scorer.score_deps`](/api/scorer#score_deps) for the attribute `"dep"` ignoring the labels `p` and `punct` and [`Scorer.score_spans`](/api/scorer/#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~                                                | | ||||||
| 
 | 
 | ||||||
| ## DependencyParser.\_\_call\_\_ {#call tag="method"} | ## DependencyParser.\_\_call\_\_ {#call tag="method"} | ||||||
| 
 | 
 | ||||||
|  | @ -259,21 +260,6 @@ predicted scores. | ||||||
| | `scores`    | Scores representing the model's predictions. ~~StateClass~~                 | | | `scores`    | Scores representing the model's predictions. ~~StateClass~~                 | | ||||||
| | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | ||||||
| 
 | 
 | ||||||
| ## DependencyParser.score {#score tag="method" new="3"} |  | ||||||
| 
 |  | ||||||
| Score a batch of examples. |  | ||||||
| 
 |  | ||||||
| > #### Example |  | ||||||
| > |  | ||||||
| > ```python |  | ||||||
| > scores = parser.score(examples) |  | ||||||
| > ``` |  | ||||||
| 
 |  | ||||||
| | Name        | Description                                                                                                                                                              | |  | ||||||
| | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | |  | ||||||
| | `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                                                             | |  | ||||||
| | **RETURNS** | The scores, produced by [`Scorer.score_spans`](/api/scorer#score_spans) and [`Scorer.score_deps`](/api/scorer#score_deps). ~~Dict[str, Union[float, Dict[str, float]]]~~ | |  | ||||||
| 
 |  | ||||||
| ## DependencyParser.create_optimizer {#create_optimizer tag="method"} | ## DependencyParser.create_optimizer {#create_optimizer tag="method"} | ||||||
| 
 | 
 | ||||||
| Create an [`Optimizer`](https://thinc.ai/docs/api-optimizers) for the pipeline | Create an [`Optimizer`](https://thinc.ai/docs/api-optimizers) for the pipeline | ||||||
|  |  | ||||||
|  | @ -50,6 +50,7 @@ architectures and their arguments and hyperparameters. | ||||||
| | `model`                | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [EntityLinker](/api/architectures#EntityLinker). ~~Model~~                                                                                                                   | | | `model`                | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [EntityLinker](/api/architectures#EntityLinker). ~~Model~~                                                                                                                   | | ||||||
| | `entity_vector_length` | Size of encoding vectors in the KB. Defaults to `64`. ~~int~~                                                                                                                                                                                                            | | | `entity_vector_length` | Size of encoding vectors in the KB. Defaults to `64`. ~~int~~                                                                                                                                                                                                            | | ||||||
| | `get_candidates`       | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ | | | `get_candidates`       | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ | | ||||||
|  | | `scorer`               | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~                                                                                                                                                                  | | ||||||
| 
 | 
 | ||||||
| ```python | ```python | ||||||
| %%GITHUB_SPACY/spacy/pipeline/entity_linker.py | %%GITHUB_SPACY/spacy/pipeline/entity_linker.py | ||||||
|  | @ -259,21 +260,6 @@ pipe's entity linking model and context encoder. Delegates to | ||||||
| | `losses`       | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | | | `losses`       | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | | ||||||
| | **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                    | | | **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                    | | ||||||
| 
 | 
 | ||||||
| ## EntityLinker.score {#score tag="method" new="3"} |  | ||||||
| 
 |  | ||||||
| Score a batch of examples. |  | ||||||
| 
 |  | ||||||
| > #### Example |  | ||||||
| > |  | ||||||
| > ```python |  | ||||||
| > scores = entity_linker.score(examples) |  | ||||||
| > ``` |  | ||||||
| 
 |  | ||||||
| | Name        | Description                                                                                    | |  | ||||||
| | ----------- | ---------------------------------------------------------------------------------------------- | |  | ||||||
| | `examples`  | The examples to score. ~~Iterable[Example]~~                                                   | |  | ||||||
| | **RETURNS** | The scores, produced by [`Scorer.score_links`](/api/scorer#score_links) . ~~Dict[str, float]~~ | |  | ||||||
| 
 |  | ||||||
| ## EntityLinker.create_optimizer {#create_optimizer tag="method"} | ## EntityLinker.create_optimizer {#create_optimizer tag="method"} | ||||||
| 
 | 
 | ||||||
| Create an optimizer for the pipeline component. | Create an optimizer for the pipeline component. | ||||||
|  |  | ||||||
|  | @ -48,6 +48,7 @@ architectures and their arguments and hyperparameters. | ||||||
| | `update_with_oracle_cut_size` | During training, cut long sequences into shorter segments by creating intermediate states based on the gold-standard history. The model is not very sensitive to this parameter, so you usually won't need to change it. Defaults to `100`. ~~int~~ | | | `update_with_oracle_cut_size` | During training, cut long sequences into shorter segments by creating intermediate states based on the gold-standard history. The model is not very sensitive to this parameter, so you usually won't need to change it. Defaults to `100`. ~~int~~ | | ||||||
| | `model`                       | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [TransitionBasedParser](/api/architectures#TransitionBasedParser). ~~Model[List[Doc], List[Floats2d]]~~                                                 | | | `model`                       | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [TransitionBasedParser](/api/architectures#TransitionBasedParser). ~~Model[List[Doc], List[Floats2d]]~~                                                 | | ||||||
| | `incorrect_spans_key`         | This key refers to a `SpanGroup` in `doc.spans` that specifies incorrect spans. The NER wiill learn not to predict (exactly) those spans. Defaults to `None`. ~~Optional[str]~~                                                                     | | | `incorrect_spans_key`         | This key refers to a `SpanGroup` in `doc.spans` that specifies incorrect spans. The NER wiill learn not to predict (exactly) those spans. Defaults to `None`. ~~Optional[str]~~                                                                     | | ||||||
|  | | `scorer`                      | The scoring method. Defaults to [`spacy.scorer.get_ner_prf`](/api/scorer#get_ner_prf). ~~Optional[Callable]~~                                                                                                                                       | | ||||||
| 
 | 
 | ||||||
| ```python | ```python | ||||||
| %%GITHUB_SPACY/spacy/pipeline/ner.pyx | %%GITHUB_SPACY/spacy/pipeline/ner.pyx | ||||||
|  | @ -251,21 +252,6 @@ predicted scores. | ||||||
| | `scores`    | Scores representing the model's predictions. ~~StateClass~~                 | | | `scores`    | Scores representing the model's predictions. ~~StateClass~~                 | | ||||||
| | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | ||||||
| 
 | 
 | ||||||
| ## EntityRecognizer.score {#score tag="method" new="3"} |  | ||||||
| 
 |  | ||||||
| Score a batch of examples. |  | ||||||
| 
 |  | ||||||
| > #### Example |  | ||||||
| > |  | ||||||
| > ```python |  | ||||||
| > scores = ner.score(examples) |  | ||||||
| > ``` |  | ||||||
| 
 |  | ||||||
| | Name        | Description                                               | |  | ||||||
| | ----------- | --------------------------------------------------------- | |  | ||||||
| | `examples`  | The examples to score. ~~Iterable[Example]~~              | |  | ||||||
| | **RETURNS** | The scores. ~~Dict[str, Union[float, Dict[str, float]]]~~ | |  | ||||||
| 
 |  | ||||||
| ## EntityRecognizer.create_optimizer {#create_optimizer tag="method"} | ## EntityRecognizer.create_optimizer {#create_optimizer tag="method"} | ||||||
| 
 | 
 | ||||||
| Create an optimizer for the pipeline component. | Create an optimizer for the pipeline component. | ||||||
|  |  | ||||||
|  | @ -40,6 +40,7 @@ how the component should be configured. You can override its settings via the | ||||||
| | `validate`            | Whether patterns should be validated (passed to the `Matcher` and `PhraseMatcher`). Defaults to `False`. ~~bool~~                                                                             | | | `validate`            | Whether patterns should be validated (passed to the `Matcher` and `PhraseMatcher`). Defaults to `False`. ~~bool~~                                                                             | | ||||||
| | `overwrite_ents`      | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. Defaults to `False`. ~~bool~~                                                     | | | `overwrite_ents`      | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. Defaults to `False`. ~~bool~~                                                     | | ||||||
| | `ent_id_sep`          | Separator used internally for entity IDs. Defaults to `"\|\|"`. ~~str~~                                                                                                                       | | | `ent_id_sep`          | Separator used internally for entity IDs. Defaults to `"\|\|"`. ~~str~~                                                                                                                       | | ||||||
|  | | `scorer`              | The scoring method. Defaults to [`spacy.scorer.get_ner_prf`](/api/scorer#get_ner_prf). ~~Optional[Callable]~~                                                                                 | | ||||||
| 
 | 
 | ||||||
| ```python | ```python | ||||||
| %%GITHUB_SPACY/spacy/pipeline/entityruler.py | %%GITHUB_SPACY/spacy/pipeline/entityruler.py | ||||||
|  |  | ||||||
|  | @ -48,10 +48,12 @@ data format used by the lookup and rule-based lemmatizers, see | ||||||
| > ``` | > ``` | ||||||
| 
 | 
 | ||||||
| | Setting        | Description                                                                                                                                               | | | Setting        | Description                                                                                                                                               | | ||||||
| | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | | | -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||||
| | `mode`         | The lemmatizer mode, e.g. `"lookup"` or `"rule"`. Defaults to `lookup` if no language-specific lemmatizer is available (see the following table). ~~str~~ | | | `mode`         | The lemmatizer mode, e.g. `"lookup"` or `"rule"`. Defaults to `lookup` if no language-specific lemmatizer is available (see the following table). ~~str~~ | | ||||||
| | `overwrite`    | Whether to overwrite existing lemmas. Defaults to `False`. ~~bool~~                                                                                       | | | `overwrite`    | Whether to overwrite existing lemmas. Defaults to `False`. ~~bool~~                                                                                       | | ||||||
| | `model`        | **Not yet implemented:** the model to use. ~~Model~~                                                                                                      | | | `model`        | **Not yet implemented:** the model to use. ~~Model~~                                                                                                      | | ||||||
|  | | _keyword-only_ |                                                                                                                                                           | | ||||||
|  | | `scorer`       | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"lemma"`. ~~Optional[Callable]~~             | | ||||||
| 
 | 
 | ||||||
| Many languages specify a default lemmatizer mode other than `lookup` if a better | Many languages specify a default lemmatizer mode other than `lookup` if a better | ||||||
| lemmatizer is available. The lemmatizer modes `rule` and `pos_lookup` require | lemmatizer is available. The lemmatizer modes `rule` and `pos_lookup` require | ||||||
|  |  | ||||||
|  | @ -62,10 +62,12 @@ shortcut for this and instantiate the component using its string name and | ||||||
| > ``` | > ``` | ||||||
| 
 | 
 | ||||||
| | Name           | Description                                                                                                                                                                                                                                                            | | | Name           | Description                                                                                                                                                                                                                                                            | | ||||||
| | ------- | -------------------------------------------------------------------------------------------------------------------- | | | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||||
| | `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                                                       | | | `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                                                       | | ||||||
| | `model`        | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                   | | | `model`        | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                   | | ||||||
| | `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                                                    | | | `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                                                    | | ||||||
|  | | _keyword-only_ |                                                                                                                                                                                                                                                                        | | ||||||
|  | | `scorer`       | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ | | ||||||
| 
 | 
 | ||||||
| ## Morphologizer.\_\_call\_\_ {#call tag="method"} | ## Morphologizer.\_\_call\_\_ {#call tag="method"} | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -298,8 +298,10 @@ Score a batch of examples. | ||||||
| > ``` | > ``` | ||||||
| 
 | 
 | ||||||
| | Name           | Description                                                                                             | | | Name           | Description                                                                                             | | ||||||
| | ----------- | ------------------------------------------------------------------------------------------------------- | | | -------------- | ------------------------------------------------------------------------------------------------------- | | ||||||
| | `examples`     | The examples to score. ~~Iterable[Example]~~                                                            | | | `examples`     | The examples to score. ~~Iterable[Example]~~                                                            | | ||||||
|  | | _keyword-only_ | | ||||||
|  | | `\*\*kwargs`   | Any additional settings to pass on to the scorer. ~~Any~~                                               | | ||||||
| | **RETURNS**    | The scores, e.g. produced by the [`Scorer`](/api/scorer). ~~Dict[str, Union[float, Dict[str, float]]]~~ | | | **RETURNS**    | The scores, e.g. produced by the [`Scorer`](/api/scorer). ~~Dict[str, Union[float, Dict[str, float]]]~~ | | ||||||
| 
 | 
 | ||||||
| ## TrainablePipe.create_optimizer {#create_optimizer tag="method"} | ## TrainablePipe.create_optimizer {#create_optimizer tag="method"} | ||||||
|  |  | ||||||
|  | @ -28,8 +28,12 @@ Create a new `Scorer`. | ||||||
| > ``` | > ``` | ||||||
| 
 | 
 | ||||||
| | Name               | Description                                                                                                                                                                                                                               | | | Name               | Description                                                                                                                                                                                                                               | | ||||||
| | ----- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | | ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||||
| | `nlp` | The pipeline to use for scoring, where each pipeline component may provide a scoring method. If none is provided, then a default pipeline for the multi-language code `xx` is constructed containing: `senter`, `tagger`, `morphologizer`, `parser`, `ner`, `textcat`. ~~Language~~ | | | `nlp`              | The pipeline to use for scoring, where each pipeline component may provide a scoring method. If none is provided, then a default pipeline is constructed using the `default_lang` and `default_pipeline` settings. ~~Optional[Language]~~ | | ||||||
|  | | `default_lang`     | The language to use for a default pipeline if `nlp` is not provided. Defaults to `xx`. ~~str~~                                                                                                                                            | | ||||||
|  | | `default_pipeline` | The pipeline components to use for a default pipeline if `nlp` is not provided. Defaults to `("senter", "tagger", "morphologizer", "parser", "ner", "textcat")`. ~~Iterable[string]~~                                                     | | ||||||
|  | | _keyword-only_     |                                                                                                                                                                                                                                           | | ||||||
|  | | `\*\*kwargs`       | Any additional settings to pass on to the individual scoring methods. ~~Any~~                                                                                                                                                             | | ||||||
| 
 | 
 | ||||||
| ## Scorer.score {#score tag="method"} | ## Scorer.score {#score tag="method"} | ||||||
| 
 | 
 | ||||||
|  | @ -80,7 +84,7 @@ Docs with `has_unknown_spaces` are skipped during scoring. | ||||||
| > ``` | > ``` | ||||||
| 
 | 
 | ||||||
| | Name        | Description                                                                                                         | | | Name        | Description                                                                                                         | | ||||||
| | ----------- | ------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ | | | ----------- | ------------------------------------------------------------------------------------------------------------------- | | ||||||
| | `examples`  | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ | | | `examples`  | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ | | ||||||
| | **RETURNS** | `Dict`                                                                                                              | A dictionary containing the scores `token_acc`, `token_p`, `token_r`, `token_f`. ~~Dict[str, float]]~~ | | | **RETURNS** | `Dict`                                                                                                              | A dictionary containing the scores `token_acc`, `token_p`, `token_r`, `token_f`. ~~Dict[str, float]]~~ | | ||||||
| 
 | 
 | ||||||
|  | @ -253,3 +257,11 @@ entities that overlap between the gold reference and the predictions. | ||||||
| | _keyword-only_    |                                                                                                                     | | | _keyword-only_    |                                                                                                                     | | ||||||
| | `negative_labels` | The string values that refer to no annotation (e.g. "NIL"). ~~Iterable[str]~~                                       | | | `negative_labels` | The string values that refer to no annotation (e.g. "NIL"). ~~Iterable[str]~~                                       | | ||||||
| | **RETURNS**       | A dictionary containing the scores. ~~Dict[str, Optional[float]]~~                                                  | | | **RETURNS**       | A dictionary containing the scores. ~~Dict[str, Optional[float]]~~                                                  | | ||||||
|  | 
 | ||||||
|  | ## get_ner_prf {#get_ner_prf new="3"} | ||||||
|  | 
 | ||||||
|  | Compute micro-PRF and per-entity PRF scores. | ||||||
|  | 
 | ||||||
|  | | Name       | Description                                                                                                         | | ||||||
|  | | ---------- | ------------------------------------------------------------------------------------------------------------------- | | ||||||
|  | | `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ | | ||||||
|  |  | ||||||
|  | @ -61,10 +61,12 @@ shortcut for this and instantiate the component using its string name and | ||||||
| [`nlp.add_pipe`](/api/language#add_pipe). | [`nlp.add_pipe`](/api/language#add_pipe). | ||||||
| 
 | 
 | ||||||
| | Name           | Description                                                                                                                         | | | Name           | Description                                                                                                                         | | ||||||
| | ------- | -------------------------------------------------------------------------------------------------------------------- | | | -------------- | ----------------------------------------------------------------------------------------------------------------------------------- | | ||||||
| | `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                    | | | `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                    | | ||||||
| | `model`        | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~                | | | `model`        | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~                | | ||||||
| | `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                 | | | `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                 | | ||||||
|  | | _keyword-only_ |                                                                                                                                     | | ||||||
|  | | `scorer`       | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~ | | ||||||
| 
 | 
 | ||||||
| ## SentenceRecognizer.\_\_call\_\_ {#call tag="method"} | ## SentenceRecognizer.\_\_call\_\_ {#call tag="method"} | ||||||
| 
 | 
 | ||||||
|  | @ -238,21 +240,6 @@ predicted scores. | ||||||
| | `scores`    | Scores representing the model's predictions.                                | | | `scores`    | Scores representing the model's predictions.                                | | ||||||
| | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | ||||||
| 
 | 
 | ||||||
| ## SentenceRecognizer.score {#score tag="method" new="3"} |  | ||||||
| 
 |  | ||||||
| Score a batch of examples. |  | ||||||
| 
 |  | ||||||
| > #### Example |  | ||||||
| > |  | ||||||
| > ```python |  | ||||||
| > scores = senter.score(examples) |  | ||||||
| > ``` |  | ||||||
| 
 |  | ||||||
| | Name        | Description                                                                                                                                               | |  | ||||||
| | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | |  | ||||||
| | `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                                              | |  | ||||||
| | **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"`, `"tag"` and `"lemma"`. ~~Dict[str, float]~~ | |  | ||||||
| 
 |  | ||||||
| ## SentenceRecognizer.create_optimizer {#create_optimizer tag="method"} | ## SentenceRecognizer.create_optimizer {#create_optimizer tag="method"} | ||||||
| 
 | 
 | ||||||
| Create an optimizer for the pipeline component. | Create an optimizer for the pipeline component. | ||||||
|  |  | ||||||
|  | @ -28,7 +28,7 @@ how the component should be configured. You can override its settings via the | ||||||
| > ``` | > ``` | ||||||
| 
 | 
 | ||||||
| | Setting       | Description                                                                                                                                            | | | Setting       | Description                                                                                                                                            | | ||||||
| | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | ------ | | | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | | ||||||
| | `punct_chars` | Optional custom list of punctuation characters that mark sentence ends. See below for defaults if not set. Defaults to `None`. ~~Optional[List[str]]~~ | `None` | | | `punct_chars` | Optional custom list of punctuation characters that mark sentence ends. See below for defaults if not set. Defaults to `None`. ~~Optional[List[str]]~~ | `None` | | ||||||
| 
 | 
 | ||||||
| ```python | ```python | ||||||
|  | @ -51,9 +51,10 @@ Initialize the sentencizer. | ||||||
| > ``` | > ``` | ||||||
| 
 | 
 | ||||||
| | Name           | Description                                                                                                                        | | | Name           | Description                                                                                                                        | | ||||||
| | -------------- | ----------------------------------------------------------------------------------------------------------------------- | | | -------------- | ---------------------------------------------------------------------------------------------------------------------------------- | | ||||||
| | _keyword-only_ |                                                                                                                                    | | | _keyword-only_ |                                                                                                                                    | | ||||||
| | `punct_chars`  | Optional custom list of punctuation characters that mark sentence ends. See below for defaults. ~~Optional[List[str]]~~            | | | `punct_chars`  | Optional custom list of punctuation characters that mark sentence ends. See below for defaults. ~~Optional[List[str]]~~            | | ||||||
|  | | `scorer`       | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"` ~~Optional[Callable]~~ | | ||||||
| 
 | 
 | ||||||
| ```python | ```python | ||||||
| ### punct_chars defaults | ### punct_chars defaults | ||||||
|  | @ -112,21 +113,6 @@ applied to the `Doc` in order. | ||||||
| | `batch_size`   | The number of documents to buffer. Defaults to `128`. ~~int~~ | | | `batch_size`   | The number of documents to buffer. Defaults to `128`. ~~int~~ | | ||||||
| | **YIELDS**     | The processed documents in order. ~~Doc~~                     | | | **YIELDS**     | The processed documents in order. ~~Doc~~                     | | ||||||
| 
 | 
 | ||||||
| ## Sentencizer.score {#score tag="method" new="3"} |  | ||||||
| 
 |  | ||||||
| Score a batch of examples. |  | ||||||
| 
 |  | ||||||
| > #### Example |  | ||||||
| > |  | ||||||
| > ```python |  | ||||||
| > scores = sentencizer.score(examples) |  | ||||||
| > ``` |  | ||||||
| 
 |  | ||||||
| | Name        | Description                                                                                                           | |  | ||||||
| | ----------- | --------------------------------------------------------------------------------------------------------------------- | |  | ||||||
| | `examples`  | The examples to score. ~~Iterable[Example]~~                                                                          | |  | ||||||
| | **RETURNS** | The scores, produced by [`Scorer.score_spans`](/api/scorer#score_spans). ~~Dict[str, Union[float, Dict[str, float]]~~ | |  | ||||||
| 
 |  | ||||||
| ## Sentencizer.to_disk {#to_disk tag="method"} | ## Sentencizer.to_disk {#to_disk tag="method"} | ||||||
| 
 | 
 | ||||||
| Save the sentencizer settings (punctuation characters) to a directory. Will | Save the sentencizer settings (punctuation characters) to a directory. Will | ||||||
|  |  | ||||||
|  | @ -43,6 +43,7 @@ architectures and their arguments and hyperparameters. | ||||||
| | `spans_key`    | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"spans"`. ~~str~~                                                                               | | | `spans_key`    | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"spans"`. ~~str~~                                                                               | | ||||||
| | `threshold`    | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~                                                                                                                                                          | | | `threshold`    | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~                                                                                                                                                          | | ||||||
| | `max_positive` | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~                                                                                                                                                                                      | | | `max_positive` | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~                                                                                                                                                                                      | | ||||||
|  | | `scorer`       | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~                                                                                                                                       | | ||||||
| 
 | 
 | ||||||
| ```python | ```python | ||||||
| %%GITHUB_SPACY/spacy/pipeline/spancat.py | %%GITHUB_SPACY/spacy/pipeline/spancat.py | ||||||
|  | @ -241,22 +242,6 @@ predicted scores. | ||||||
| | `scores`    | Scores representing the model's predictions.                                | | | `scores`    | Scores representing the model's predictions.                                | | ||||||
| | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | ||||||
| 
 | 
 | ||||||
| ## SpanCategorizer.score {#score tag="method"} |  | ||||||
| 
 |  | ||||||
| Score a batch of examples. |  | ||||||
| 
 |  | ||||||
| > #### Example |  | ||||||
| > |  | ||||||
| > ```python |  | ||||||
| > scores = spancat.score(examples) |  | ||||||
| > ``` |  | ||||||
| 
 |  | ||||||
| | Name           | Description                                                                                                            | |  | ||||||
| | -------------- | ---------------------------------------------------------------------------------------------------------------------- | |  | ||||||
| | `examples`     | The examples to score. ~~Iterable[Example]~~                                                                           | |  | ||||||
| | _keyword-only_ |                                                                                                                        | |  | ||||||
| | **RETURNS**    | The scores, produced by [`Scorer.score_spans`](/api/scorer#score_spans). ~~Dict[str, Union[float, Dict[str, float]]]~~ | |  | ||||||
| 
 |  | ||||||
| ## SpanCategorizer.create_optimizer {#create_optimizer tag="method"} | ## SpanCategorizer.create_optimizer {#create_optimizer tag="method"} | ||||||
| 
 | 
 | ||||||
| Create an optimizer for the pipeline component. | Create an optimizer for the pipeline component. | ||||||
|  |  | ||||||
|  | @ -55,10 +55,12 @@ shortcut for this and instantiate the component using its string name and | ||||||
| [`nlp.add_pipe`](/api/language#add_pipe). | [`nlp.add_pipe`](/api/language#add_pipe). | ||||||
| 
 | 
 | ||||||
| | Name           | Description                                                                                                                                                                                                                                           | | | Name           | Description                                                                                                                                                                                                                                           | | ||||||
| | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||||
| | `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                                      | | | `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                                      | | ||||||
| | `model`        | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). ~~Model[List[Doc], List[Floats2d]]~~ | | | `model`        | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). ~~Model[List[Doc], List[Floats2d]]~~ | | ||||||
| | `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                                   | | | `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                                   | | ||||||
|  | | _keyword-only_ |                                                                                                                                                                                                                                                       | | ||||||
|  | | `scorer`       | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~                                                                                                           | | ||||||
| 
 | 
 | ||||||
| ## Tagger.\_\_call\_\_ {#call tag="method"} | ## Tagger.\_\_call\_\_ {#call tag="method"} | ||||||
| 
 | 
 | ||||||
|  | @ -249,21 +251,6 @@ predicted scores. | ||||||
| | `scores`    | Scores representing the model's predictions.                                | | | `scores`    | Scores representing the model's predictions.                                | | ||||||
| | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | ||||||
| 
 | 
 | ||||||
| ## Tagger.score {#score tag="method" new="3"} |  | ||||||
| 
 |  | ||||||
| Score a batch of examples. |  | ||||||
| 
 |  | ||||||
| > #### Example |  | ||||||
| > |  | ||||||
| > ```python |  | ||||||
| > scores = tagger.score(examples) |  | ||||||
| > ``` |  | ||||||
| 
 |  | ||||||
| | Name        | Description                                                                                                                       | |  | ||||||
| | ----------- | --------------------------------------------------------------------------------------------------------------------------------- | |  | ||||||
| | `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                      | |  | ||||||
| | **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Dict[str, float]~~ | |  | ||||||
| 
 |  | ||||||
| ## Tagger.create_optimizer {#create_optimizer tag="method"} | ## Tagger.create_optimizer {#create_optimizer tag="method"} | ||||||
| 
 | 
 | ||||||
| Create an optimizer for the pipeline component. | Create an optimizer for the pipeline component. | ||||||
|  |  | ||||||
|  | @ -97,12 +97,13 @@ shortcut for this and instantiate the component using its string name and | ||||||
| [`nlp.add_pipe`](/api/language#create_pipe). | [`nlp.add_pipe`](/api/language#create_pipe). | ||||||
| 
 | 
 | ||||||
| | Name           | Description                                                                                                                      | | | Name           | Description                                                                                                                      | | ||||||
| | -------------- | -------------------------------------------------------------------------------------------------------------------------- | | | -------------- | -------------------------------------------------------------------------------------------------------------------------------- | | ||||||
| | `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                 | | | `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                 | | ||||||
| | `model`        | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~       | | | `model`        | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~       | | ||||||
| | `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                              | | | `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                              | | ||||||
| | _keyword-only_ |                                                                                                                                  | | | _keyword-only_ |                                                                                                                                  | | ||||||
| | `threshold`    | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~                                   | | | `threshold`    | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~                                   | | ||||||
|  | | `scorer`       | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ | | ||||||
| 
 | 
 | ||||||
| ## TextCategorizer.\_\_call\_\_ {#call tag="method"} | ## TextCategorizer.\_\_call\_\_ {#call tag="method"} | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -373,6 +373,7 @@ factories. | ||||||
| | `optimizers`      | Registry for functions that create [optimizers](https://thinc.ai/docs/api-optimizers).                                                                                                                                                             | | | `optimizers`      | Registry for functions that create [optimizers](https://thinc.ai/docs/api-optimizers).                                                                                                                                                             | | ||||||
| | `readers`         | Registry for file and data readers, including training and evaluation data readers like [`Corpus`](/api/corpus).                                                                                                                                   | | | `readers`         | Registry for file and data readers, including training and evaluation data readers like [`Corpus`](/api/corpus).                                                                                                                                   | | ||||||
| | `schedules`       | Registry for functions that create [schedules](https://thinc.ai/docs/api-schedules).                                                                                                                                                               | | | `schedules`       | Registry for functions that create [schedules](https://thinc.ai/docs/api-schedules).                                                                                                                                                               | | ||||||
|  | | `scorers`         | Registry for functions that create scoring methods for user with the [`Scorer`](/api/scorer). Scoring methods are called with `Iterable[Example]` and arbitrary `\*\*kwargs` and return scores as `Dict[str, Any]`.                                | | ||||||
| | `tokenizers`      | Registry for tokenizer factories. Registered functions should return a callback that receives the `nlp` object and returns a [`Tokenizer`](/api/tokenizer) or a custom callable.                                                                   | | | `tokenizers`      | Registry for tokenizer factories. Registered functions should return a callback that receives the `nlp` object and returns a [`Tokenizer`](/api/tokenizer) or a custom callable.                                                                   | | ||||||
| 
 | 
 | ||||||
| ### spacy-transformers registry {#registry-transformers} | ### spacy-transformers registry {#registry-transformers} | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user