mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Add AttributeRuler.score (#5963)
* Add AttributeRuler.score Add scoring for TAG / POS / MORPH / LEMMA if these are present in the assigned token attributes. Add default score weights (that don't really make a lot of sense) so that the scores are in the default config in some form. * Update docs
This commit is contained in:
parent
79d460e3a2
commit
90d88729e0
|
@ -4,9 +4,11 @@ from pathlib import Path
|
|||
|
||||
from .pipe import Pipe
|
||||
from ..errors import Errors
|
||||
from ..gold import validate_examples
|
||||
from ..language import Language
|
||||
from ..matcher import Matcher
|
||||
from ..symbols import IDS
|
||||
from ..scorer import Scorer
|
||||
from ..symbols import IDS, TAG, POS, MORPH, LEMMA
|
||||
from ..tokens import Doc, Span
|
||||
from ..tokens._retokenize import normalize_token_attrs, set_token_attrs
|
||||
from ..vocab import Vocab
|
||||
|
@ -192,6 +194,32 @@ class AttributeRuler(Pipe):
|
|||
all_patterns.append(p)
|
||||
return all_patterns
|
||||
|
||||
def score(self, examples, **kwargs):
|
||||
"""Score a batch of examples.
|
||||
|
||||
examples (Iterable[Example]): The examples to score.
|
||||
RETURNS (Dict[str, Any]): The scores, produced by
|
||||
Scorer.score_token_attr for the attributes "tag", "pos", "morph"
|
||||
and "lemma" for the target token attributes.
|
||||
|
||||
DOCS: https://spacy.io/api/tagger#score
|
||||
"""
|
||||
validate_examples(examples, "AttributeRuler.score")
|
||||
results = {}
|
||||
attrs = set()
|
||||
for token_attrs in self.attrs:
|
||||
attrs.update(token_attrs)
|
||||
for attr in attrs:
|
||||
if attr == TAG:
|
||||
results.update(Scorer.score_token_attr(examples, "tag", **kwargs))
|
||||
elif attr == POS:
|
||||
results.update(Scorer.score_token_attr(examples, "pos", **kwargs))
|
||||
elif attr == MORPH:
|
||||
results.update(Scorer.score_token_attr(examples, "morph", **kwargs))
|
||||
elif attr == LEMMA:
|
||||
results.update(Scorer.score_token_attr(examples, "lemma", **kwargs))
|
||||
return results
|
||||
|
||||
def to_bytes(self, exclude: Iterable[str] = tuple()) -> bytes:
|
||||
"""Serialize the AttributeRuler to a bytestring.
|
||||
|
||||
|
|
|
@ -307,7 +307,7 @@ class Tagger(Pipe):
|
|||
|
||||
examples (Iterable[Example]): The examples to score.
|
||||
RETURNS (Dict[str, Any]): The scores, produced by
|
||||
Scorer.score_token_attr for the attributes "tag", "pos" and "lemma".
|
||||
Scorer.score_token_attr for the attributes "tag".
|
||||
|
||||
DOCS: https://spacy.io/api/tagger#score
|
||||
"""
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import pytest
|
||||
import numpy
|
||||
from spacy.gold import Example
|
||||
from spacy.lang.en import English
|
||||
from spacy.pipeline import AttributeRuler
|
||||
from spacy import util, registry
|
||||
|
@ -94,6 +95,23 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
|
|||
assert doc[3].morph_ == "Case=Nom|Number=Sing"
|
||||
|
||||
|
||||
def test_attributeruler_score(nlp, pattern_dicts):
|
||||
# initialize with patterns
|
||||
nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
|
||||
doc = nlp("This is a test.")
|
||||
assert doc[2].lemma_ == "the"
|
||||
assert doc[2].morph_ == "Case=Nom|Number=Plur"
|
||||
assert doc[3].lemma_ == "cat"
|
||||
assert doc[3].morph_ == "Case=Nom|Number=Sing"
|
||||
|
||||
dev_examples = [Example.from_dict(nlp.make_doc("This is a test."), {"lemmas": ["this", "is", "a", "cat", "."]})]
|
||||
scores = nlp.evaluate(dev_examples)
|
||||
# "cat" is the only correct lemma
|
||||
assert scores["lemma_acc"] == pytest.approx(0.2)
|
||||
# the empty morphs are correct
|
||||
assert scores["morph_acc"] == pytest.approx(0.6)
|
||||
|
||||
|
||||
def test_attributeruler_tag_map(nlp, tag_map):
|
||||
a = AttributeRuler(nlp.vocab)
|
||||
a.load_from_tag_map(tag_map)
|
||||
|
|
|
@ -138,6 +138,21 @@ Get all patterns that have been added to the attribute ruler in the
|
|||
| ----------- | -------------------------------------------------------------------------------------------- |
|
||||
| **RETURNS** | The patterns added to the attribute ruler. ~~List[Dict[str, Union[List[dict], dict, int]]]~~ |
|
||||
|
||||
## AttributeRuler.score {#score tag="method" new="3"}
|
||||
|
||||
Score a batch of examples.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> scores = attribute_ruler.score(examples)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ |
|
||||
|
||||
## AttributeRuler.load_from_tag_map {#load_from_tag_map tag="method"}
|
||||
|
||||
Load attribute ruler patterns from a tag map.
|
||||
|
|
|
@ -252,7 +252,7 @@ Score a batch of examples.
|
|||
| Name | Description |
|
||||
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"`, `"tag"` and `"lemma"`. ~~Dict[str, float]~~ |
|
||||
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Dict[str, float]~~ |
|
||||
|
||||
## Tagger.create_optimizer {#create_optimizer tag="method"}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user