mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	* Document scorers in registry and components from #8766 * Update spacy/pipeline/lemmatizer.py Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Update website/docs/api/dependencyparser.md Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Reformat Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									944ad6b1d4
								
							
						
					
					
						commit
						b278f31ee6
					
				|  | @ -36,9 +36,7 @@ def make_attribute_ruler( | |||
|     return AttributeRuler(nlp.vocab, name, validate=validate, scorer=scorer) | ||||
| 
 | ||||
| 
 | ||||
| def attribute_ruler_score( | ||||
|     examples: Iterable[Example], **kwargs | ||||
| ) -> Dict[str, Any]: | ||||
| def attribute_ruler_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]: | ||||
|     def morph_key_getter(token, attr): | ||||
|         return getattr(token, attr).key | ||||
| 
 | ||||
|  | @ -84,6 +82,10 @@ class AttributeRuler(Pipe): | |||
| 
 | ||||
|         vocab (Vocab): The vocab. | ||||
|         name (str): The pipe name. Defaults to "attribute_ruler". | ||||
|         scorer (Optional[Callable]): The scoring method. Defaults to | ||||
|             Scorer.score_token_attr for the attributes "tag", "pos", "morph" and | ||||
|             "lemma" and Scorer.score_token_attr_per_feat for the attribute | ||||
|             "morph". | ||||
| 
 | ||||
|         RETURNS (AttributeRuler): The AttributeRuler component. | ||||
| 
 | ||||
|  |  | |||
|  | @ -102,6 +102,7 @@ def make_parser( | |||
|         primarily affects the label accuracy, it can also affect the attachment | ||||
|         structure, as the labels are used to represent the pseudo-projectivity | ||||
|         transformation. | ||||
|     scorer (Optional[Callable]): The scoring method. | ||||
|     """ | ||||
|     return DependencyParser( | ||||
|         nlp.vocab, | ||||
|  |  | |||
|  | @ -83,6 +83,7 @@ def make_entity_linker( | |||
|     entity_vector_length (int): Size of encoding vectors in the KB. | ||||
|     get_candidates (Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]): Function that | ||||
|         produces a list of candidates, given a certain knowledge base and a textual mention. | ||||
|     scorer (Optional[Callable]): The scoring method. | ||||
|     """ | ||||
|     return EntityLinker( | ||||
|         nlp.vocab, | ||||
|  | @ -142,6 +143,8 @@ class EntityLinker(TrainablePipe): | |||
|         entity_vector_length (int): Size of encoding vectors in the KB. | ||||
|         get_candidates (Callable[[KnowledgeBase, Span], Iterable[Candidate]]): Function that | ||||
|             produces a list of candidates, given a certain knowledge base and a textual mention. | ||||
|         scorer (Optional[Callable]): The scoring method. Defaults to | ||||
|             Scorer.score_links. | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/entitylinker#init | ||||
|         """ | ||||
|  |  | |||
|  | @ -106,6 +106,8 @@ class EntityRuler(Pipe): | |||
|         overwrite_ents (bool): If existing entities are present, e.g. entities | ||||
|             added by the model, overwrite them by matches if necessary. | ||||
|         ent_id_sep (str): Separator used internally for entity IDs. | ||||
|         scorer (Optional[Callable]): The scoring method. Defaults to | ||||
|             spacy.scorer.get_ner_prf. | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/entityruler#init | ||||
|         """ | ||||
|  |  | |||
|  | @ -90,6 +90,8 @@ class Lemmatizer(Pipe): | |||
|         mode (str): The lemmatizer mode: "lookup", "rule". Defaults to "lookup". | ||||
|         overwrite (bool): Whether to overwrite existing lemmas. Defaults to | ||||
|             `False`. | ||||
|         scorer (Optional[Callable]): The scoring method. Defaults to | ||||
|             Scorer.score_token_attr for the attribute "lemma". | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/lemmatizer#init | ||||
|         """ | ||||
|  |  | |||
|  | @ -95,6 +95,9 @@ class Morphologizer(Tagger): | |||
|         model (thinc.api.Model): The Thinc Model powering the pipeline component. | ||||
|         name (str): The component instance name, used to add entries to the | ||||
|             losses during training. | ||||
|         scorer (Optional[Callable]): The scoring method. Defaults to | ||||
|             Scorer.score_token_attr for the attributes "pos" and "morph" and | ||||
|             Scorer.score_token_attr_per_feat for the attribute "morph". | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/morphologizer#init | ||||
|         """ | ||||
|  |  | |||
|  | @ -82,6 +82,7 @@ def make_ner( | |||
|     incorrect_spans_key (Optional[str]): Identifies spans that are known | ||||
|         to be incorrect entity annotations. The incorrect entity annotations | ||||
|         can be stored in the span group, under this key. | ||||
|     scorer (Optional[Callable]): The scoring method. | ||||
|     """ | ||||
|     return EntityRecognizer( | ||||
|         nlp.vocab, | ||||
|  | @ -158,6 +159,7 @@ def make_beam_ner( | |||
|         and are faster to compute. | ||||
|     incorrect_spans_key (Optional[str]): Optional key into span groups of | ||||
|         entities known to be non-entities. | ||||
|     scorer (Optional[Callable]): The scoring method. | ||||
|     """ | ||||
|     return EntityRecognizer( | ||||
|         nlp.vocab, | ||||
|  |  | |||
|  | @ -55,7 +55,8 @@ class Sentencizer(Pipe): | |||
| 
 | ||||
|         punct_chars (list): Punctuation characters to split on. Will be | ||||
|             serialized with the nlp object. | ||||
|         RETURNS (Sentencizer): The sentencizer component. | ||||
|         scorer (Optional[Callable]): The scoring method. Defaults to | ||||
|             Scorer.score_spans for the attribute "sents". | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/sentencizer#init | ||||
|         """ | ||||
|  |  | |||
|  | @ -69,6 +69,8 @@ class SentenceRecognizer(Tagger): | |||
|         model (thinc.api.Model): The Thinc Model powering the pipeline component. | ||||
|         name (str): The component instance name, used to add entries to the | ||||
|             losses during training. | ||||
|         scorer (Optional[Callable]): The scoring method. Defaults to | ||||
|             Scorer.score_spans for the attribute "sents". | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/sentencerecognizer#init | ||||
|         """ | ||||
|  |  | |||
|  | @ -181,6 +181,22 @@ class SpanCategorizer(TrainablePipe): | |||
|         scorer: Optional[Callable] = spancat_score, | ||||
|     ) -> None: | ||||
|         """Initialize the span categorizer. | ||||
|         vocab (Vocab): The shared vocabulary. | ||||
|         model (thinc.api.Model): The Thinc Model powering the pipeline component. | ||||
|         name (str): The component instance name, used to add entries to the | ||||
|             losses during training. | ||||
|         spans_key (str): Key of the Doc.spans dict to save the spans under. | ||||
|             During initialization and training, the component will look for | ||||
|             spans on the reference document under the same key. Defaults to | ||||
|             `"spans"`. | ||||
|         threshold (float): Minimum probability to consider a prediction | ||||
|             positive. Spans with a positive prediction will be saved on the Doc. | ||||
|             Defaults to 0.5. | ||||
|         max_positive (Optional[int]): Maximum number of labels to consider | ||||
|             positive per span. Defaults to None, indicating no limit. | ||||
|         scorer (Optional[Callable]): The scoring method. Defaults to | ||||
|             Scorer.score_spans for the Doc.spans[spans_key] with overlapping | ||||
|             spans allowed. | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/spancategorizer#init | ||||
|         """ | ||||
|  |  | |||
|  | @ -78,6 +78,8 @@ class Tagger(TrainablePipe): | |||
|         model (thinc.api.Model): The Thinc Model powering the pipeline component. | ||||
|         name (str): The component instance name, used to add entries to the | ||||
|             losses during training. | ||||
|         scorer (Optional[Callable]): The scoring method. Defaults to | ||||
|             Scorer.score_token_attr for the attribute "tag". | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/tagger#init | ||||
|         """ | ||||
|  |  | |||
|  | @ -104,6 +104,7 @@ def make_textcat( | |||
|     model (Model[List[Doc], List[Floats2d]]): A model instance that predicts | ||||
|         scores for each category. | ||||
|     threshold (float): Cutoff to consider a prediction "positive". | ||||
|     scorer (Optional[Callable]): The scoring method. | ||||
|     """ | ||||
|     return TextCategorizer(nlp.vocab, model, name, threshold=threshold, scorer=scorer) | ||||
| 
 | ||||
|  | @ -144,6 +145,8 @@ class TextCategorizer(TrainablePipe): | |||
|         name (str): The component instance name, used to add entries to the | ||||
|             losses during training. | ||||
|         threshold (float): Cutoff to consider a prediction "positive". | ||||
|         scorer (Optional[Callable]): The scoring method. Defaults to | ||||
|                 Scorer.score_cats for the attribute "cats". | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/textcategorizer#init | ||||
|         """ | ||||
|  |  | |||
|  | @ -87,6 +87,7 @@ cdef class Parser(TrainablePipe): | |||
|         incorrect_spans_key (Optional[str]): Identifies spans that are known | ||||
|             to be incorrect entity annotations. The incorrect entity annotations | ||||
|             can be stored in the span group, under this key. | ||||
|         scorer (Optional[Callable]): The scoring method. Defaults to None. | ||||
|         """ | ||||
|         self.vocab = vocab | ||||
|         self.name = name | ||||
|  |  | |||
|  | @ -49,11 +49,12 @@ Initialize the attribute ruler. | |||
| > ``` | ||||
| 
 | ||||
| | Name           | Description                                                                                                                                                                                                                                                                                | | ||||
| | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | ||||
| | `vocab`        | The shared vocabulary to pass to the matcher. ~~Vocab~~                                                                                                                                                                                                                                    | | ||||
| | `name`         | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. ~~str~~                                                                                                                                                   | | ||||
| | _keyword-only_ |                                                                                                                                                                                                                                                                                            | | ||||
| | `validate`     | Whether patterns should be validated (passed to the [`Matcher`](/api/matcher#init)). Defaults to `False`. ~~bool~~                                                                                                                                                                         | | ||||
| | `scorer`       | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag`", `"pos"`, `"morph"` and `"lemma"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ | | ||||
| 
 | ||||
| ## AttributeRuler.\_\_call\_\_ {#call tag="method"} | ||||
| 
 | ||||
|  | @ -175,21 +176,6 @@ Load attribute ruler patterns from morph rules. | |||
| | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `morph_rules` | The morph rules that map token text and fine-grained tags to coarse-grained tags, lemmas and morphological features. ~~Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]~~ | | ||||
| 
 | ||||
| ## AttributeRuler.score {#score tag="method" new="3"} | ||||
| 
 | ||||
| Score a batch of examples. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
| > ```python | ||||
| > scores = ruler.score(examples) | ||||
| > ``` | ||||
| 
 | ||||
| | Name        | Description                                                                                                                                                                                                           | | ||||
| | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                                                                                                          | | ||||
| | **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ | | ||||
| 
 | ||||
| ## AttributeRuler.to_disk {#to_disk tag="method"} | ||||
| 
 | ||||
| Serialize the pipe to disk. | ||||
|  |  | |||
|  | @ -91,6 +91,7 @@ shortcut for this and instantiate the component using its string name and | |||
| | `update_with_oracle_cut_size` | During training, cut long sequences into shorter segments by creating intermediate states based on the gold-standard history. The model is not very sensitive to this parameter, so you usually won't need to change it. Defaults to `100`. ~~int~~                                                 | | ||||
| | `learn_tokens`                | Whether to learn to merge subtokens that are split relative to the gold standard. Experimental. Defaults to `False`. ~~bool~~                                                                                                                                                                       | | ||||
| | `min_action_freq`             | The minimum frequency of labelled actions to retain. Rarer labelled actions have their label backed-off to "dep". While this primarily affects the label accuracy, it can also affect the attachment structure, as the labels are used to represent the pseudo-projectivity transformation. ~~int~~ | | ||||
| | `scorer`                      | The scoring method. Defaults to [`Scorer.score_deps`](/api/scorer#score_deps) for the attribute `"dep"` ignoring the labels `p` and `punct` and [`Scorer.score_spans`](/api/scorer/#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~                                                | | ||||
| 
 | ||||
| ## DependencyParser.\_\_call\_\_ {#call tag="method"} | ||||
| 
 | ||||
|  | @ -259,21 +260,6 @@ predicted scores. | |||
| | `scores`    | Scores representing the model's predictions. ~~StateClass~~                 | | ||||
| | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | ||||
| 
 | ||||
| ## DependencyParser.score {#score tag="method" new="3"} | ||||
| 
 | ||||
| Score a batch of examples. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
| > ```python | ||||
| > scores = parser.score(examples) | ||||
| > ``` | ||||
| 
 | ||||
| | Name        | Description                                                                                                                                                              | | ||||
| | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | ||||
| | `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                                                             | | ||||
| | **RETURNS** | The scores, produced by [`Scorer.score_spans`](/api/scorer#score_spans) and [`Scorer.score_deps`](/api/scorer#score_deps). ~~Dict[str, Union[float, Dict[str, float]]]~~ | | ||||
| 
 | ||||
| ## DependencyParser.create_optimizer {#create_optimizer tag="method"} | ||||
| 
 | ||||
| Create an [`Optimizer`](https://thinc.ai/docs/api-optimizers) for the pipeline | ||||
|  |  | |||
|  | @ -50,6 +50,7 @@ architectures and their arguments and hyperparameters. | |||
| | `model`                | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [EntityLinker](/api/architectures#EntityLinker). ~~Model~~                                                                                                                   | | ||||
| | `entity_vector_length` | Size of encoding vectors in the KB. Defaults to `64`. ~~int~~                                                                                                                                                                                                            | | ||||
| | `get_candidates`       | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ | | ||||
| | `scorer`               | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~                                                                                                                                                                  | | ||||
| 
 | ||||
| ```python | ||||
| %%GITHUB_SPACY/spacy/pipeline/entity_linker.py | ||||
|  | @ -259,21 +260,6 @@ pipe's entity linking model and context encoder. Delegates to | |||
| | `losses`       | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | | ||||
| | **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                    | | ||||
| 
 | ||||
| ## EntityLinker.score {#score tag="method" new="3"} | ||||
| 
 | ||||
| Score a batch of examples. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
| > ```python | ||||
| > scores = entity_linker.score(examples) | ||||
| > ``` | ||||
| 
 | ||||
| | Name        | Description                                                                                    | | ||||
| | ----------- | ---------------------------------------------------------------------------------------------- | | ||||
| | `examples`  | The examples to score. ~~Iterable[Example]~~                                                   | | ||||
| | **RETURNS** | The scores, produced by [`Scorer.score_links`](/api/scorer#score_links) . ~~Dict[str, float]~~ | | ||||
| 
 | ||||
| ## EntityLinker.create_optimizer {#create_optimizer tag="method"} | ||||
| 
 | ||||
| Create an optimizer for the pipeline component. | ||||
|  |  | |||
|  | @ -48,6 +48,7 @@ architectures and their arguments and hyperparameters. | |||
| | `update_with_oracle_cut_size` | During training, cut long sequences into shorter segments by creating intermediate states based on the gold-standard history. The model is not very sensitive to this parameter, so you usually won't need to change it. Defaults to `100`. ~~int~~ | | ||||
| | `model`                       | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [TransitionBasedParser](/api/architectures#TransitionBasedParser). ~~Model[List[Doc], List[Floats2d]]~~                                                 | | ||||
| | `incorrect_spans_key`         | This key refers to a `SpanGroup` in `doc.spans` that specifies incorrect spans. The NER wiill learn not to predict (exactly) those spans. Defaults to `None`. ~~Optional[str]~~                                                                     | | ||||
| | `scorer`                      | The scoring method. Defaults to [`spacy.scorer.get_ner_prf`](/api/scorer#get_ner_prf). ~~Optional[Callable]~~                                                                                                                                       | | ||||
| 
 | ||||
| ```python | ||||
| %%GITHUB_SPACY/spacy/pipeline/ner.pyx | ||||
|  | @ -251,21 +252,6 @@ predicted scores. | |||
| | `scores`    | Scores representing the model's predictions. ~~StateClass~~                 | | ||||
| | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | ||||
| 
 | ||||
| ## EntityRecognizer.score {#score tag="method" new="3"} | ||||
| 
 | ||||
| Score a batch of examples. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
| > ```python | ||||
| > scores = ner.score(examples) | ||||
| > ``` | ||||
| 
 | ||||
| | Name        | Description                                               | | ||||
| | ----------- | --------------------------------------------------------- | | ||||
| | `examples`  | The examples to score. ~~Iterable[Example]~~              | | ||||
| | **RETURNS** | The scores. ~~Dict[str, Union[float, Dict[str, float]]]~~ | | ||||
| 
 | ||||
| ## EntityRecognizer.create_optimizer {#create_optimizer tag="method"} | ||||
| 
 | ||||
| Create an optimizer for the pipeline component. | ||||
|  |  | |||
|  | @ -40,6 +40,7 @@ how the component should be configured. You can override its settings via the | |||
| | `validate`            | Whether patterns should be validated (passed to the `Matcher` and `PhraseMatcher`). Defaults to `False`. ~~bool~~                                                                             | | ||||
| | `overwrite_ents`      | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. Defaults to `False`. ~~bool~~                                                     | | ||||
| | `ent_id_sep`          | Separator used internally for entity IDs. Defaults to `"\|\|"`. ~~str~~                                                                                                                       | | ||||
| | `scorer`              | The scoring method. Defaults to [`spacy.scorer.get_ner_prf`](/api/scorer#get_ner_prf). ~~Optional[Callable]~~                                                                                 | | ||||
| 
 | ||||
| ```python | ||||
| %%GITHUB_SPACY/spacy/pipeline/entityruler.py | ||||
|  |  | |||
|  | @ -48,10 +48,12 @@ data format used by the lookup and rule-based lemmatizers, see | |||
| > ``` | ||||
| 
 | ||||
| | Setting        | Description                                                                                                                                               | | ||||
| | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `mode`         | The lemmatizer mode, e.g. `"lookup"` or `"rule"`. Defaults to `lookup` if no language-specific lemmatizer is available (see the following table). ~~str~~ | | ||||
| | `overwrite`    | Whether to overwrite existing lemmas. Defaults to `False`. ~~bool~~                                                                                       | | ||||
| | `model`        | **Not yet implemented:** the model to use. ~~Model~~                                                                                                      | | ||||
| | _keyword-only_ |                                                                                                                                                           | | ||||
| | `scorer`       | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"lemma"`. ~~Optional[Callable]~~             | | ||||
| 
 | ||||
| Many languages specify a default lemmatizer mode other than `lookup` if a better | ||||
| lemmatizer is available. The lemmatizer modes `rule` and `pos_lookup` require | ||||
|  |  | |||
|  | @ -62,10 +62,12 @@ shortcut for this and instantiate the component using its string name and | |||
| > ``` | ||||
| 
 | ||||
| | Name           | Description                                                                                                                                                                                                                                                            | | ||||
| | ------- | -------------------------------------------------------------------------------------------------------------------- | | ||||
| | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                                                       | | ||||
| | `model`        | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                   | | ||||
| | `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                                                    | | ||||
| | _keyword-only_ |                                                                                                                                                                                                                                                                        | | ||||
| | `scorer`       | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ | | ||||
| 
 | ||||
| ## Morphologizer.\_\_call\_\_ {#call tag="method"} | ||||
| 
 | ||||
|  |  | |||
|  | @ -298,8 +298,10 @@ Score a batch of examples. | |||
| > ``` | ||||
| 
 | ||||
| | Name           | Description                                                                                             | | ||||
| | ----------- | ------------------------------------------------------------------------------------------------------- | | ||||
| | -------------- | ------------------------------------------------------------------------------------------------------- | | ||||
| | `examples`     | The examples to score. ~~Iterable[Example]~~                                                            | | ||||
| | _keyword-only_ | | ||||
| | `\*\*kwargs`   | Any additional settings to pass on to the scorer. ~~Any~~                                               | | ||||
| | **RETURNS**    | The scores, e.g. produced by the [`Scorer`](/api/scorer). ~~Dict[str, Union[float, Dict[str, float]]]~~ | | ||||
| 
 | ||||
| ## TrainablePipe.create_optimizer {#create_optimizer tag="method"} | ||||
|  |  | |||
|  | @ -28,8 +28,12 @@ Create a new `Scorer`. | |||
| > ``` | ||||
| 
 | ||||
| | Name               | Description                                                                                                                                                                                                                               | | ||||
| | ----- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `nlp` | The pipeline to use for scoring, where each pipeline component may provide a scoring method. If none is provided, then a default pipeline for the multi-language code `xx` is constructed containing: `senter`, `tagger`, `morphologizer`, `parser`, `ner`, `textcat`. ~~Language~~ | | ||||
| | ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `nlp`              | The pipeline to use for scoring, where each pipeline component may provide a scoring method. If none is provided, then a default pipeline is constructed using the `default_lang` and `default_pipeline` settings. ~~Optional[Language]~~ | | ||||
| | `default_lang`     | The language to use for a default pipeline if `nlp` is not provided. Defaults to `xx`. ~~str~~                                                                                                                                            | | ||||
| | `default_pipeline` | The pipeline components to use for a default pipeline if `nlp` is not provided. Defaults to `("senter", "tagger", "morphologizer", "parser", "ner", "textcat")`. ~~Iterable[string]~~                                                     | | ||||
| | _keyword-only_     |                                                                                                                                                                                                                                           | | ||||
| | `\*\*kwargs`       | Any additional settings to pass on to the individual scoring methods. ~~Any~~                                                                                                                                                             | | ||||
| 
 | ||||
| ## Scorer.score {#score tag="method"} | ||||
| 
 | ||||
|  | @ -80,7 +84,7 @@ Docs with `has_unknown_spaces` are skipped during scoring. | |||
| > ``` | ||||
| 
 | ||||
| | Name        | Description                                                                                                         | | ||||
| | ----------- | ------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ | | ||||
| | ----------- | ------------------------------------------------------------------------------------------------------------------- | | ||||
| | `examples`  | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ | | ||||
| | **RETURNS** | `Dict`                                                                                                              | A dictionary containing the scores `token_acc`, `token_p`, `token_r`, `token_f`. ~~Dict[str, float]]~~ | | ||||
| 
 | ||||
|  | @ -253,3 +257,11 @@ entities that overlap between the gold reference and the predictions. | |||
| | _keyword-only_    |                                                                                                                     | | ||||
| | `negative_labels` | The string values that refer to no annotation (e.g. "NIL"). ~~Iterable[str]~~                                       | | ||||
| | **RETURNS**       | A dictionary containing the scores. ~~Dict[str, Optional[float]]~~                                                  | | ||||
| 
 | ||||
| ## get_ner_prf {#get_ner_prf new="3"} | ||||
| 
 | ||||
| Compute micro-PRF and per-entity PRF scores. | ||||
| 
 | ||||
| | Name       | Description                                                                                                         | | ||||
| | ---------- | ------------------------------------------------------------------------------------------------------------------- | | ||||
| | `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ | | ||||
|  |  | |||
|  | @ -61,10 +61,12 @@ shortcut for this and instantiate the component using its string name and | |||
| [`nlp.add_pipe`](/api/language#add_pipe). | ||||
| 
 | ||||
| | Name           | Description                                                                                                                         | | ||||
| | ------- | -------------------------------------------------------------------------------------------------------------------- | | ||||
| | -------------- | ----------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                    | | ||||
| | `model`        | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~                | | ||||
| | `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                 | | ||||
| | _keyword-only_ |                                                                                                                                     | | ||||
| | `scorer`       | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~ | | ||||
| 
 | ||||
| ## SentenceRecognizer.\_\_call\_\_ {#call tag="method"} | ||||
| 
 | ||||
|  | @ -238,21 +240,6 @@ predicted scores. | |||
| | `scores`    | Scores representing the model's predictions.                                | | ||||
| | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | ||||
| 
 | ||||
| ## SentenceRecognizer.score {#score tag="method" new="3"} | ||||
| 
 | ||||
| Score a batch of examples. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
| > ```python | ||||
| > scores = senter.score(examples) | ||||
| > ``` | ||||
| 
 | ||||
| | Name        | Description                                                                                                                                               | | ||||
| | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                                              | | ||||
| | **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"`, `"tag"` and `"lemma"`. ~~Dict[str, float]~~ | | ||||
| 
 | ||||
| ## SentenceRecognizer.create_optimizer {#create_optimizer tag="method"} | ||||
| 
 | ||||
| Create an optimizer for the pipeline component. | ||||
|  |  | |||
|  | @ -28,7 +28,7 @@ how the component should be configured. You can override its settings via the | |||
| > ``` | ||||
| 
 | ||||
| | Setting       | Description                                                                                                                                            | | ||||
| | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | ------ | | ||||
| | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | | ||||
| | `punct_chars` | Optional custom list of punctuation characters that mark sentence ends. See below for defaults if not set. Defaults to `None`. ~~Optional[List[str]]~~ | `None` | | ||||
| 
 | ||||
| ```python | ||||
|  | @ -51,9 +51,10 @@ Initialize the sentencizer. | |||
| > ``` | ||||
| 
 | ||||
| | Name           | Description                                                                                                                        | | ||||
| | -------------- | ----------------------------------------------------------------------------------------------------------------------- | | ||||
| | -------------- | ---------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | _keyword-only_ |                                                                                                                                    | | ||||
| | `punct_chars`  | Optional custom list of punctuation characters that mark sentence ends. See below for defaults. ~~Optional[List[str]]~~            | | ||||
| | `scorer`       | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"` ~~Optional[Callable]~~ | | ||||
| 
 | ||||
| ```python | ||||
| ### punct_chars defaults | ||||
|  | @ -112,21 +113,6 @@ applied to the `Doc` in order. | |||
| | `batch_size`   | The number of documents to buffer. Defaults to `128`. ~~int~~ | | ||||
| | **YIELDS**     | The processed documents in order. ~~Doc~~                     | | ||||
| 
 | ||||
| ## Sentencizer.score {#score tag="method" new="3"} | ||||
| 
 | ||||
| Score a batch of examples. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
| > ```python | ||||
| > scores = sentencizer.score(examples) | ||||
| > ``` | ||||
| 
 | ||||
| | Name        | Description                                                                                                           | | ||||
| | ----------- | --------------------------------------------------------------------------------------------------------------------- | | ||||
| | `examples`  | The examples to score. ~~Iterable[Example]~~                                                                          | | ||||
| | **RETURNS** | The scores, produced by [`Scorer.score_spans`](/api/scorer#score_spans). ~~Dict[str, Union[float, Dict[str, float]]~~ | | ||||
| 
 | ||||
| ## Sentencizer.to_disk {#to_disk tag="method"} | ||||
| 
 | ||||
| Save the sentencizer settings (punctuation characters) to a directory. Will | ||||
|  |  | |||
|  | @ -43,6 +43,7 @@ architectures and their arguments and hyperparameters. | |||
| | `spans_key`    | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"spans"`. ~~str~~                                                                               | | ||||
| | `threshold`    | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~                                                                                                                                                          | | ||||
| | `max_positive` | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~                                                                                                                                                                                      | | ||||
| | `scorer`       | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~                                                                                                                                       | | ||||
| 
 | ||||
| ```python | ||||
| %%GITHUB_SPACY/spacy/pipeline/spancat.py | ||||
|  | @ -241,22 +242,6 @@ predicted scores. | |||
| | `scores`    | Scores representing the model's predictions.                                | | ||||
| | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | ||||
| 
 | ||||
| ## SpanCategorizer.score {#score tag="method"} | ||||
| 
 | ||||
| Score a batch of examples. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
| > ```python | ||||
| > scores = spancat.score(examples) | ||||
| > ``` | ||||
| 
 | ||||
| | Name           | Description                                                                                                            | | ||||
| | -------------- | ---------------------------------------------------------------------------------------------------------------------- | | ||||
| | `examples`     | The examples to score. ~~Iterable[Example]~~                                                                           | | ||||
| | _keyword-only_ |                                                                                                                        | | ||||
| | **RETURNS**    | The scores, produced by [`Scorer.score_spans`](/api/scorer#score_spans). ~~Dict[str, Union[float, Dict[str, float]]]~~ | | ||||
| 
 | ||||
| ## SpanCategorizer.create_optimizer {#create_optimizer tag="method"} | ||||
| 
 | ||||
| Create an optimizer for the pipeline component. | ||||
|  |  | |||
|  | @ -55,10 +55,12 @@ shortcut for this and instantiate the component using its string name and | |||
| [`nlp.add_pipe`](/api/language#add_pipe). | ||||
| 
 | ||||
| | Name           | Description                                                                                                                                                                                                                                           | | ||||
| | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                                      | | ||||
| | `model`        | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). ~~Model[List[Doc], List[Floats2d]]~~ | | ||||
| | `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                                   | | ||||
| | _keyword-only_ |                                                                                                                                                                                                                                                       | | ||||
| | `scorer`       | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~                                                                                                           | | ||||
| 
 | ||||
| ## Tagger.\_\_call\_\_ {#call tag="method"} | ||||
| 
 | ||||
|  | @ -249,21 +251,6 @@ predicted scores. | |||
| | `scores`    | Scores representing the model's predictions.                                | | ||||
| | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | ||||
| 
 | ||||
| ## Tagger.score {#score tag="method" new="3"} | ||||
| 
 | ||||
| Score a batch of examples. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
| > ```python | ||||
| > scores = tagger.score(examples) | ||||
| > ``` | ||||
| 
 | ||||
| | Name        | Description                                                                                                                       | | ||||
| | ----------- | --------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                      | | ||||
| | **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Dict[str, float]~~ | | ||||
| 
 | ||||
| ## Tagger.create_optimizer {#create_optimizer tag="method"} | ||||
| 
 | ||||
| Create an optimizer for the pipeline component. | ||||
|  |  | |||
|  | @ -97,12 +97,13 @@ shortcut for this and instantiate the component using its string name and | |||
| [`nlp.add_pipe`](/api/language#create_pipe). | ||||
| 
 | ||||
| | Name           | Description                                                                                                                      | | ||||
| | -------------- | -------------------------------------------------------------------------------------------------------------------------- | | ||||
| | -------------- | -------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                 | | ||||
| | `model`        | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~       | | ||||
| | `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                              | | ||||
| | _keyword-only_ |                                                                                                                                  | | ||||
| | `threshold`    | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~                                   | | ||||
| | `scorer`       | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ | | ||||
| 
 | ||||
| ## TextCategorizer.\_\_call\_\_ {#call tag="method"} | ||||
| 
 | ||||
|  |  | |||
|  | @ -373,6 +373,7 @@ factories. | |||
| | `optimizers`      | Registry for functions that create [optimizers](https://thinc.ai/docs/api-optimizers).                                                                                                                                                             | | ||||
| | `readers`         | Registry for file and data readers, including training and evaluation data readers like [`Corpus`](/api/corpus).                                                                                                                                   | | ||||
| | `schedules`       | Registry for functions that create [schedules](https://thinc.ai/docs/api-schedules).                                                                                                                                                               | | ||||
| | `scorers`         | Registry for functions that create scoring methods for user with the [`Scorer`](/api/scorer). Scoring methods are called with `Iterable[Example]` and arbitrary `\*\*kwargs` and return scores as `Dict[str, Any]`.                                | | ||||
| | `tokenizers`      | Registry for tokenizer factories. Registered functions should return a callback that receives the `nlp` object and returns a [`Tokenizer`](/api/tokenizer) or a custom callable.                                                                   | | ||||
| 
 | ||||
| ### spacy-transformers registry {#registry-transformers} | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user