mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-24 00:04:15 +03:00
* Document scorers in registry and components from #8766 * Update spacy/pipeline/lemmatizer.py Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Update website/docs/api/dependencyparser.md Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Reformat Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
parent
944ad6b1d4
commit
b278f31ee6
|
@ -36,9 +36,7 @@ def make_attribute_ruler(
|
|||
return AttributeRuler(nlp.vocab, name, validate=validate, scorer=scorer)
|
||||
|
||||
|
||||
def attribute_ruler_score(
|
||||
examples: Iterable[Example], **kwargs
|
||||
) -> Dict[str, Any]:
|
||||
def attribute_ruler_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
|
||||
def morph_key_getter(token, attr):
|
||||
return getattr(token, attr).key
|
||||
|
||||
|
@ -84,6 +82,10 @@ class AttributeRuler(Pipe):
|
|||
|
||||
vocab (Vocab): The vocab.
|
||||
name (str): The pipe name. Defaults to "attribute_ruler".
|
||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||
Scorer.score_token_attr for the attributes "tag", "pos", "morph" and
|
||||
"lemma" and Scorer.score_token_attr_per_feat for the attribute
|
||||
"morph".
|
||||
|
||||
RETURNS (AttributeRuler): The AttributeRuler component.
|
||||
|
||||
|
|
|
@ -102,6 +102,7 @@ def make_parser(
|
|||
primarily affects the label accuracy, it can also affect the attachment
|
||||
structure, as the labels are used to represent the pseudo-projectivity
|
||||
transformation.
|
||||
scorer (Optional[Callable]): The scoring method.
|
||||
"""
|
||||
return DependencyParser(
|
||||
nlp.vocab,
|
||||
|
|
|
@ -83,6 +83,7 @@ def make_entity_linker(
|
|||
entity_vector_length (int): Size of encoding vectors in the KB.
|
||||
get_candidates (Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]): Function that
|
||||
produces a list of candidates, given a certain knowledge base and a textual mention.
|
||||
scorer (Optional[Callable]): The scoring method.
|
||||
"""
|
||||
return EntityLinker(
|
||||
nlp.vocab,
|
||||
|
@ -142,6 +143,8 @@ class EntityLinker(TrainablePipe):
|
|||
entity_vector_length (int): Size of encoding vectors in the KB.
|
||||
get_candidates (Callable[[KnowledgeBase, Span], Iterable[Candidate]]): Function that
|
||||
produces a list of candidates, given a certain knowledge base and a textual mention.
|
||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||
Scorer.score_links.
|
||||
|
||||
DOCS: https://spacy.io/api/entitylinker#init
|
||||
"""
|
||||
|
|
|
@ -106,6 +106,8 @@ class EntityRuler(Pipe):
|
|||
overwrite_ents (bool): If existing entities are present, e.g. entities
|
||||
added by the model, overwrite them by matches if necessary.
|
||||
ent_id_sep (str): Separator used internally for entity IDs.
|
||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||
spacy.scorer.get_ner_prf.
|
||||
|
||||
DOCS: https://spacy.io/api/entityruler#init
|
||||
"""
|
||||
|
|
|
@ -90,6 +90,8 @@ class Lemmatizer(Pipe):
|
|||
mode (str): The lemmatizer mode: "lookup", "rule". Defaults to "lookup".
|
||||
overwrite (bool): Whether to overwrite existing lemmas. Defaults to
|
||||
`False`.
|
||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||
Scorer.score_token_attr for the attribute "lemma".
|
||||
|
||||
DOCS: https://spacy.io/api/lemmatizer#init
|
||||
"""
|
||||
|
|
|
@ -95,6 +95,9 @@ class Morphologizer(Tagger):
|
|||
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
||||
name (str): The component instance name, used to add entries to the
|
||||
losses during training.
|
||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||
Scorer.score_token_attr for the attributes "pos" and "morph" and
|
||||
Scorer.score_token_attr_per_feat for the attribute "morph".
|
||||
|
||||
DOCS: https://spacy.io/api/morphologizer#init
|
||||
"""
|
||||
|
|
|
@ -82,6 +82,7 @@ def make_ner(
|
|||
incorrect_spans_key (Optional[str]): Identifies spans that are known
|
||||
to be incorrect entity annotations. The incorrect entity annotations
|
||||
can be stored in the span group, under this key.
|
||||
scorer (Optional[Callable]): The scoring method.
|
||||
"""
|
||||
return EntityRecognizer(
|
||||
nlp.vocab,
|
||||
|
@ -158,6 +159,7 @@ def make_beam_ner(
|
|||
and are faster to compute.
|
||||
incorrect_spans_key (Optional[str]): Optional key into span groups of
|
||||
entities known to be non-entities.
|
||||
scorer (Optional[Callable]): The scoring method.
|
||||
"""
|
||||
return EntityRecognizer(
|
||||
nlp.vocab,
|
||||
|
|
|
@ -55,7 +55,8 @@ class Sentencizer(Pipe):
|
|||
|
||||
punct_chars (list): Punctuation characters to split on. Will be
|
||||
serialized with the nlp object.
|
||||
RETURNS (Sentencizer): The sentencizer component.
|
||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||
Scorer.score_spans for the attribute "sents".
|
||||
|
||||
DOCS: https://spacy.io/api/sentencizer#init
|
||||
"""
|
||||
|
|
|
@ -69,6 +69,8 @@ class SentenceRecognizer(Tagger):
|
|||
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
||||
name (str): The component instance name, used to add entries to the
|
||||
losses during training.
|
||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||
Scorer.score_spans for the attribute "sents".
|
||||
|
||||
DOCS: https://spacy.io/api/sentencerecognizer#init
|
||||
"""
|
||||
|
|
|
@ -181,6 +181,22 @@ class SpanCategorizer(TrainablePipe):
|
|||
scorer: Optional[Callable] = spancat_score,
|
||||
) -> None:
|
||||
"""Initialize the span categorizer.
|
||||
vocab (Vocab): The shared vocabulary.
|
||||
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
||||
name (str): The component instance name, used to add entries to the
|
||||
losses during training.
|
||||
spans_key (str): Key of the Doc.spans dict to save the spans under.
|
||||
During initialization and training, the component will look for
|
||||
spans on the reference document under the same key. Defaults to
|
||||
`"spans"`.
|
||||
threshold (float): Minimum probability to consider a prediction
|
||||
positive. Spans with a positive prediction will be saved on the Doc.
|
||||
Defaults to 0.5.
|
||||
max_positive (Optional[int]): Maximum number of labels to consider
|
||||
positive per span. Defaults to None, indicating no limit.
|
||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||
Scorer.score_spans for the Doc.spans[spans_key] with overlapping
|
||||
spans allowed.
|
||||
|
||||
DOCS: https://spacy.io/api/spancategorizer#init
|
||||
"""
|
||||
|
|
|
@ -78,6 +78,8 @@ class Tagger(TrainablePipe):
|
|||
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
||||
name (str): The component instance name, used to add entries to the
|
||||
losses during training.
|
||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||
Scorer.score_token_attr for the attribute "tag".
|
||||
|
||||
DOCS: https://spacy.io/api/tagger#init
|
||||
"""
|
||||
|
|
|
@ -104,6 +104,7 @@ def make_textcat(
|
|||
model (Model[List[Doc], List[Floats2d]]): A model instance that predicts
|
||||
scores for each category.
|
||||
threshold (float): Cutoff to consider a prediction "positive".
|
||||
scorer (Optional[Callable]): The scoring method.
|
||||
"""
|
||||
return TextCategorizer(nlp.vocab, model, name, threshold=threshold, scorer=scorer)
|
||||
|
||||
|
@ -144,6 +145,8 @@ class TextCategorizer(TrainablePipe):
|
|||
name (str): The component instance name, used to add entries to the
|
||||
losses during training.
|
||||
threshold (float): Cutoff to consider a prediction "positive".
|
||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||
Scorer.score_cats for the attribute "cats".
|
||||
|
||||
DOCS: https://spacy.io/api/textcategorizer#init
|
||||
"""
|
||||
|
|
|
@ -87,6 +87,7 @@ cdef class Parser(TrainablePipe):
|
|||
incorrect_spans_key (Optional[str]): Identifies spans that are known
|
||||
to be incorrect entity annotations. The incorrect entity annotations
|
||||
can be stored in the span group, under this key.
|
||||
scorer (Optional[Callable]): The scoring method. Defaults to None.
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self.name = name
|
||||
|
|
|
@ -48,12 +48,13 @@ Initialize the attribute ruler.
|
|||
> ruler = nlp.add_pipe("attribute_ruler")
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `vocab` | The shared vocabulary to pass to the matcher. ~~Vocab~~ |
|
||||
| `name` | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. ~~str~~ |
|
||||
| _keyword-only_ | |
|
||||
| `validate` | Whether patterns should be validated (passed to the [`Matcher`](/api/matcher#init)). Defaults to `False`. ~~bool~~ |
|
||||
| Name | Description |
|
||||
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `vocab` | The shared vocabulary to pass to the matcher. ~~Vocab~~ |
|
||||
| `name` | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. ~~str~~ |
|
||||
| _keyword-only_ | |
|
||||
| `validate` | Whether patterns should be validated (passed to the [`Matcher`](/api/matcher#init)). Defaults to `False`. ~~bool~~ |
|
||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag`", `"pos"`, `"morph"` and `"lemma"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
|
||||
|
||||
## AttributeRuler.\_\_call\_\_ {#call tag="method"}
|
||||
|
||||
|
@ -175,21 +176,6 @@ Load attribute ruler patterns from morph rules.
|
|||
| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `morph_rules` | The morph rules that map token text and fine-grained tags to coarse-grained tags, lemmas and morphological features. ~~Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]~~ |
|
||||
|
||||
## AttributeRuler.score {#score tag="method" new="3"}
|
||||
|
||||
Score a batch of examples.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> scores = ruler.score(examples)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ |
|
||||
|
||||
## AttributeRuler.to_disk {#to_disk tag="method"}
|
||||
|
||||
Serialize the pipe to disk.
|
||||
|
|
|
@ -91,6 +91,7 @@ shortcut for this and instantiate the component using its string name and
|
|||
| `update_with_oracle_cut_size` | During training, cut long sequences into shorter segments by creating intermediate states based on the gold-standard history. The model is not very sensitive to this parameter, so you usually won't need to change it. Defaults to `100`. ~~int~~ |
|
||||
| `learn_tokens` | Whether to learn to merge subtokens that are split relative to the gold standard. Experimental. Defaults to `False`. ~~bool~~ |
|
||||
| `min_action_freq` | The minimum frequency of labelled actions to retain. Rarer labelled actions have their label backed-off to "dep". While this primarily affects the label accuracy, it can also affect the attachment structure, as the labels are used to represent the pseudo-projectivity transformation. ~~int~~ |
|
||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_deps`](/api/scorer#score_deps) for the attribute `"dep"` ignoring the labels `p` and `punct` and [`Scorer.score_spans`](/api/scorer/#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~ |
|
||||
|
||||
## DependencyParser.\_\_call\_\_ {#call tag="method"}
|
||||
|
||||
|
@ -259,21 +260,6 @@ predicted scores.
|
|||
| `scores` | Scores representing the model's predictions. ~~StateClass~~ |
|
||||
| **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ |
|
||||
|
||||
## DependencyParser.score {#score tag="method" new="3"}
|
||||
|
||||
Score a batch of examples.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> scores = parser.score(examples)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||
| **RETURNS** | The scores, produced by [`Scorer.score_spans`](/api/scorer#score_spans) and [`Scorer.score_deps`](/api/scorer#score_deps). ~~Dict[str, Union[float, Dict[str, float]]]~~ |
|
||||
|
||||
## DependencyParser.create_optimizer {#create_optimizer tag="method"}
|
||||
|
||||
Create an [`Optimizer`](https://thinc.ai/docs/api-optimizers) for the pipeline
|
||||
|
|
|
@ -50,6 +50,7 @@ architectures and their arguments and hyperparameters.
|
|||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [EntityLinker](/api/architectures#EntityLinker). ~~Model~~ |
|
||||
| `entity_vector_length` | Size of encoding vectors in the KB. Defaults to `64`. ~~int~~ |
|
||||
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ |
|
||||
|
||||
```python
|
||||
%%GITHUB_SPACY/spacy/pipeline/entity_linker.py
|
||||
|
@ -259,21 +260,6 @@ pipe's entity linking model and context encoder. Delegates to
|
|||
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
||||
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
||||
|
||||
## EntityLinker.score {#score tag="method" new="3"}
|
||||
|
||||
Score a batch of examples.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> scores = entity_linker.score(examples)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | ---------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||
| **RETURNS** | The scores, produced by [`Scorer.score_links`](/api/scorer#score_links) . ~~Dict[str, float]~~ |
|
||||
|
||||
## EntityLinker.create_optimizer {#create_optimizer tag="method"}
|
||||
|
||||
Create an optimizer for the pipeline component.
|
||||
|
|
|
@ -48,6 +48,7 @@ architectures and their arguments and hyperparameters.
|
|||
| `update_with_oracle_cut_size` | During training, cut long sequences into shorter segments by creating intermediate states based on the gold-standard history. The model is not very sensitive to this parameter, so you usually won't need to change it. Defaults to `100`. ~~int~~ |
|
||||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [TransitionBasedParser](/api/architectures#TransitionBasedParser). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `incorrect_spans_key` | This key refers to a `SpanGroup` in `doc.spans` that specifies incorrect spans. The NER wiill learn not to predict (exactly) those spans. Defaults to `None`. ~~Optional[str]~~ |
|
||||
| `scorer` | The scoring method. Defaults to [`spacy.scorer.get_ner_prf`](/api/scorer#get_ner_prf). ~~Optional[Callable]~~ |
|
||||
|
||||
```python
|
||||
%%GITHUB_SPACY/spacy/pipeline/ner.pyx
|
||||
|
@ -251,21 +252,6 @@ predicted scores.
|
|||
| `scores` | Scores representing the model's predictions. ~~StateClass~~ |
|
||||
| **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ |
|
||||
|
||||
## EntityRecognizer.score {#score tag="method" new="3"}
|
||||
|
||||
Score a batch of examples.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> scores = ner.score(examples)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | --------------------------------------------------------- |
|
||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||
| **RETURNS** | The scores. ~~Dict[str, Union[float, Dict[str, float]]]~~ |
|
||||
|
||||
## EntityRecognizer.create_optimizer {#create_optimizer tag="method"}
|
||||
|
||||
Create an optimizer for the pipeline component.
|
||||
|
|
|
@ -40,6 +40,7 @@ how the component should be configured. You can override its settings via the
|
|||
| `validate` | Whether patterns should be validated (passed to the `Matcher` and `PhraseMatcher`). Defaults to `False`. ~~bool~~ |
|
||||
| `overwrite_ents` | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. Defaults to `False`. ~~bool~~ |
|
||||
| `ent_id_sep` | Separator used internally for entity IDs. Defaults to `"\|\|"`. ~~str~~ |
|
||||
| `scorer` | The scoring method. Defaults to [`spacy.scorer.get_ner_prf`](/api/scorer#get_ner_prf). ~~Optional[Callable]~~ |
|
||||
|
||||
```python
|
||||
%%GITHUB_SPACY/spacy/pipeline/entityruler.py
|
||||
|
|
|
@ -47,11 +47,13 @@ data format used by the lookup and rule-based lemmatizers, see
|
|||
> nlp.add_pipe("lemmatizer", config=config)
|
||||
> ```
|
||||
|
||||
| Setting | Description |
|
||||
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `mode` | The lemmatizer mode, e.g. `"lookup"` or `"rule"`. Defaults to `lookup` if no language-specific lemmatizer is available (see the following table). ~~str~~ |
|
||||
| `overwrite` | Whether to overwrite existing lemmas. Defaults to `False`. ~~bool~~ |
|
||||
| `model` | **Not yet implemented:** the model to use. ~~Model~~ |
|
||||
| Setting | Description |
|
||||
| -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `mode` | The lemmatizer mode, e.g. `"lookup"` or `"rule"`. Defaults to `lookup` if no language-specific lemmatizer is available (see the following table). ~~str~~ |
|
||||
| `overwrite` | Whether to overwrite existing lemmas. Defaults to `False`. ~~bool~~ |
|
||||
| `model` | **Not yet implemented:** the model to use. ~~Model~~ |
|
||||
| _keyword-only_ | |
|
||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"lemma"`. ~~Optional[Callable]~~ |
|
||||
|
||||
Many languages specify a default lemmatizer mode other than `lookup` if a better
|
||||
lemmatizer is available. The lemmatizer modes `rule` and `pos_lookup` require
|
||||
|
|
|
@ -61,11 +61,13 @@ shortcut for this and instantiate the component using its string name and
|
|||
> morphologizer = Morphologizer(nlp.vocab, model)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ------- | -------------------------------------------------------------------------------------------------------------------- |
|
||||
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
||||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||
| Name | Description |
|
||||
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
||||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||
| _keyword-only_ | |
|
||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
|
||||
|
||||
## Morphologizer.\_\_call\_\_ {#call tag="method"}
|
||||
|
||||
|
|
|
@ -297,10 +297,12 @@ Score a batch of examples.
|
|||
> scores = pipe.score(examples)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | ------------------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||
| **RETURNS** | The scores, e.g. produced by the [`Scorer`](/api/scorer). ~~Dict[str, Union[float, Dict[str, float]]]~~ |
|
||||
| Name | Description |
|
||||
| -------------- | ------------------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||
| _keyword-only_ |
|
||||
| `\*\*kwargs` | Any additional settings to pass on to the scorer. ~~Any~~ |
|
||||
| **RETURNS** | The scores, e.g. produced by the [`Scorer`](/api/scorer). ~~Dict[str, Union[float, Dict[str, float]]]~~ |
|
||||
|
||||
## TrainablePipe.create_optimizer {#create_optimizer tag="method"}
|
||||
|
||||
|
|
|
@ -27,9 +27,13 @@ Create a new `Scorer`.
|
|||
> scorer = Scorer(nlp)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `nlp` | The pipeline to use for scoring, where each pipeline component may provide a scoring method. If none is provided, then a default pipeline for the multi-language code `xx` is constructed containing: `senter`, `tagger`, `morphologizer`, `parser`, `ner`, `textcat`. ~~Language~~ |
|
||||
| Name | Description |
|
||||
| ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `nlp` | The pipeline to use for scoring, where each pipeline component may provide a scoring method. If none is provided, then a default pipeline is constructed using the `default_lang` and `default_pipeline` settings. ~~Optional[Language]~~ |
|
||||
| `default_lang` | The language to use for a default pipeline if `nlp` is not provided. Defaults to `xx`. ~~str~~ |
|
||||
| `default_pipeline` | The pipeline components to use for a default pipeline if `nlp` is not provided. Defaults to `("senter", "tagger", "morphologizer", "parser", "ner", "textcat")`. ~~Iterable[string]~~ |
|
||||
| _keyword-only_ | |
|
||||
| `\*\*kwargs` | Any additional settings to pass on to the individual scoring methods. ~~Any~~ |
|
||||
|
||||
## Scorer.score {#score tag="method"}
|
||||
|
||||
|
@ -80,7 +84,7 @@ Docs with `has_unknown_spaces` are skipped during scoring.
|
|||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | ------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ |
|
||||
| ----------- | ------------------------------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
||||
| **RETURNS** | `Dict` | A dictionary containing the scores `token_acc`, `token_p`, `token_r`, `token_f`. ~~Dict[str, float]]~~ |
|
||||
|
||||
|
@ -253,3 +257,11 @@ entities that overlap between the gold reference and the predictions.
|
|||
| _keyword-only_ | |
|
||||
| `negative_labels` | The string values that refer to no annotation (e.g. "NIL"). ~~Iterable[str]~~ |
|
||||
| **RETURNS** | A dictionary containing the scores. ~~Dict[str, Optional[float]]~~ |
|
||||
|
||||
## get_ner_prf {#get_ner_prf new="3"}
|
||||
|
||||
Compute micro-PRF and per-entity PRF scores.
|
||||
|
||||
| Name | Description |
|
||||
| ---------- | ------------------------------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
||||
|
|
|
@ -60,11 +60,13 @@ Create a new pipeline instance. In your application, you would normally use a
|
|||
shortcut for this and instantiate the component using its string name and
|
||||
[`nlp.add_pipe`](/api/language#add_pipe).
|
||||
|
||||
| Name | Description |
|
||||
| ------- | -------------------------------------------------------------------------------------------------------------------- |
|
||||
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
||||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||
| Name | Description |
|
||||
| -------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
||||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||
| _keyword-only_ | |
|
||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~ |
|
||||
|
||||
## SentenceRecognizer.\_\_call\_\_ {#call tag="method"}
|
||||
|
||||
|
@ -238,21 +240,6 @@ predicted scores.
|
|||
| `scores` | Scores representing the model's predictions. |
|
||||
| **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ |
|
||||
|
||||
## SentenceRecognizer.score {#score tag="method" new="3"}
|
||||
|
||||
Score a batch of examples.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> scores = senter.score(examples)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"`, `"tag"` and `"lemma"`. ~~Dict[str, float]~~ |
|
||||
|
||||
## SentenceRecognizer.create_optimizer {#create_optimizer tag="method"}
|
||||
|
||||
Create an optimizer for the pipeline component.
|
||||
|
|
|
@ -28,7 +28,7 @@ how the component should be configured. You can override its settings via the
|
|||
> ```
|
||||
|
||||
| Setting | Description |
|
||||
| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | ------ |
|
||||
| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `punct_chars` | Optional custom list of punctuation characters that mark sentence ends. See below for defaults if not set. Defaults to `None`. ~~Optional[List[str]]~~ | `None` |
|
||||
|
||||
```python
|
||||
|
@ -50,10 +50,11 @@ Initialize the sentencizer.
|
|||
> sentencizer = Sentencizer()
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| -------------- | ----------------------------------------------------------------------------------------------------------------------- |
|
||||
| _keyword-only_ | |
|
||||
| `punct_chars` | Optional custom list of punctuation characters that mark sentence ends. See below for defaults. ~~Optional[List[str]]~~ |
|
||||
| Name | Description |
|
||||
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| _keyword-only_ | |
|
||||
| `punct_chars` | Optional custom list of punctuation characters that mark sentence ends. See below for defaults. ~~Optional[List[str]]~~ |
|
||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"` ~~Optional[Callable]~~ |
|
||||
|
||||
```python
|
||||
### punct_chars defaults
|
||||
|
@ -112,21 +113,6 @@ applied to the `Doc` in order.
|
|||
| `batch_size` | The number of documents to buffer. Defaults to `128`. ~~int~~ |
|
||||
| **YIELDS** | The processed documents in order. ~~Doc~~ |
|
||||
|
||||
## Sentencizer.score {#score tag="method" new="3"}
|
||||
|
||||
Score a batch of examples.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> scores = sentencizer.score(examples)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | --------------------------------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||
| **RETURNS** | The scores, produced by [`Scorer.score_spans`](/api/scorer#score_spans). ~~Dict[str, Union[float, Dict[str, float]]~~ |
|
||||
|
||||
## Sentencizer.to_disk {#to_disk tag="method"}
|
||||
|
||||
Save the sentencizer settings (punctuation characters) to a directory. Will
|
||||
|
|
|
@ -43,6 +43,7 @@ architectures and their arguments and hyperparameters.
|
|||
| `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"spans"`. ~~str~~ |
|
||||
| `threshold` | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~ |
|
||||
| `max_positive` | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~ |
|
||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ |
|
||||
|
||||
```python
|
||||
%%GITHUB_SPACY/spacy/pipeline/spancat.py
|
||||
|
@ -241,22 +242,6 @@ predicted scores.
|
|||
| `scores` | Scores representing the model's predictions. |
|
||||
| **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ |
|
||||
|
||||
## SpanCategorizer.score {#score tag="method"}
|
||||
|
||||
Score a batch of examples.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> scores = spancat.score(examples)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| -------------- | ---------------------------------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||
| _keyword-only_ | |
|
||||
| **RETURNS** | The scores, produced by [`Scorer.score_spans`](/api/scorer#score_spans). ~~Dict[str, Union[float, Dict[str, float]]]~~ |
|
||||
|
||||
## SpanCategorizer.create_optimizer {#create_optimizer tag="method"}
|
||||
|
||||
Create an optimizer for the pipeline component.
|
||||
|
|
|
@ -54,11 +54,13 @@ Create a new pipeline instance. In your application, you would normally use a
|
|||
shortcut for this and instantiate the component using its string name and
|
||||
[`nlp.add_pipe`](/api/language#add_pipe).
|
||||
|
||||
| Name | Description |
|
||||
| ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
||||
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||
| Name | Description |
|
||||
| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
||||
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||
| _keyword-only_ | |
|
||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~ |
|
||||
|
||||
## Tagger.\_\_call\_\_ {#call tag="method"}
|
||||
|
||||
|
@ -249,21 +251,6 @@ predicted scores.
|
|||
| `scores` | Scores representing the model's predictions. |
|
||||
| **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ |
|
||||
|
||||
## Tagger.score {#score tag="method" new="3"}
|
||||
|
||||
Score a batch of examples.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> scores = tagger.score(examples)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | --------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Dict[str, float]~~ |
|
||||
|
||||
## Tagger.create_optimizer {#create_optimizer tag="method"}
|
||||
|
||||
Create an optimizer for the pipeline component.
|
||||
|
|
|
@ -96,13 +96,14 @@ Create a new pipeline instance. In your application, you would normally use a
|
|||
shortcut for this and instantiate the component using its string name and
|
||||
[`nlp.add_pipe`](/api/language#create_pipe).
|
||||
|
||||
| Name | Description |
|
||||
| -------------- | -------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
||||
| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||
| _keyword-only_ | |
|
||||
| `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ |
|
||||
| Name | Description |
|
||||
| -------------- | -------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
||||
| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||
| _keyword-only_ | |
|
||||
| `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ |
|
||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ |
|
||||
|
||||
## TextCategorizer.\_\_call\_\_ {#call tag="method"}
|
||||
|
||||
|
|
|
@ -373,6 +373,7 @@ factories.
|
|||
| `optimizers` | Registry for functions that create [optimizers](https://thinc.ai/docs/api-optimizers). |
|
||||
| `readers` | Registry for file and data readers, including training and evaluation data readers like [`Corpus`](/api/corpus). |
|
||||
| `schedules` | Registry for functions that create [schedules](https://thinc.ai/docs/api-schedules). |
|
||||
| `scorers` | Registry for functions that create scoring methods for user with the [`Scorer`](/api/scorer). Scoring methods are called with `Iterable[Example]` and arbitrary `\*\*kwargs` and return scores as `Dict[str, Any]`. |
|
||||
| `tokenizers` | Registry for tokenizer factories. Registered functions should return a callback that receives the `nlp` object and returns a [`Tokenizer`](/api/tokenizer) or a custom callable. |
|
||||
|
||||
### spacy-transformers registry {#registry-transformers}
|
||||
|
|
Loading…
Reference in New Issue
Block a user