Add AttributeRuler.score (#5963)

* Add AttributeRuler.score

Add scoring for TAG / POS / MORPH / LEMMA if these are present in the
assigned token attributes.

Add default score weights (that don't really make a lot of sense) so
that the scores are in the default config in some form.

* Update docs
This commit is contained in:
Adriane Boyd 2020-08-26 15:39:30 +02:00 committed by GitHub
parent 79d460e3a2
commit 90d88729e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 64 additions and 3 deletions

View File

@ -4,9 +4,11 @@ from pathlib import Path
from .pipe import Pipe
from ..errors import Errors
from ..gold import validate_examples
from ..language import Language
from ..matcher import Matcher
from ..symbols import IDS
from ..scorer import Scorer
from ..symbols import IDS, TAG, POS, MORPH, LEMMA
from ..tokens import Doc, Span
from ..tokens._retokenize import normalize_token_attrs, set_token_attrs
from ..vocab import Vocab
@ -192,6 +194,32 @@ class AttributeRuler(Pipe):
all_patterns.append(p)
return all_patterns
def score(self, examples, **kwargs):
"""Score a batch of examples.
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores, produced by
Scorer.score_token_attr for the attributes "tag", "pos", "morph"
and "lemma" for the target token attributes.
DOCS: https://spacy.io/api/tagger#score
"""
validate_examples(examples, "AttributeRuler.score")
results = {}
attrs = set()
for token_attrs in self.attrs:
attrs.update(token_attrs)
for attr in attrs:
if attr == TAG:
results.update(Scorer.score_token_attr(examples, "tag", **kwargs))
elif attr == POS:
results.update(Scorer.score_token_attr(examples, "pos", **kwargs))
elif attr == MORPH:
results.update(Scorer.score_token_attr(examples, "morph", **kwargs))
elif attr == LEMMA:
results.update(Scorer.score_token_attr(examples, "lemma", **kwargs))
return results
def to_bytes(self, exclude: Iterable[str] = tuple()) -> bytes:
"""Serialize the AttributeRuler to a bytestring.

View File

@ -307,7 +307,7 @@ class Tagger(Pipe):
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores, produced by
Scorer.score_token_attr for the attributes "tag", "pos" and "lemma".
Scorer.score_token_attr for the attributes "tag".
DOCS: https://spacy.io/api/tagger#score
"""

View File

@ -1,5 +1,6 @@
import pytest
import numpy
from spacy.gold import Example
from spacy.lang.en import English
from spacy.pipeline import AttributeRuler
from spacy import util, registry
@ -94,6 +95,23 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
assert doc[3].morph_ == "Case=Nom|Number=Sing"
def test_attributeruler_score(nlp, pattern_dicts):
# initialize with patterns
nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
doc = nlp("This is a test.")
assert doc[2].lemma_ == "the"
assert doc[2].morph_ == "Case=Nom|Number=Plur"
assert doc[3].lemma_ == "cat"
assert doc[3].morph_ == "Case=Nom|Number=Sing"
dev_examples = [Example.from_dict(nlp.make_doc("This is a test."), {"lemmas": ["this", "is", "a", "cat", "."]})]
scores = nlp.evaluate(dev_examples)
# "cat" is the only correct lemma
assert scores["lemma_acc"] == pytest.approx(0.2)
# the empty morphs are correct
assert scores["morph_acc"] == pytest.approx(0.6)
def test_attributeruler_tag_map(nlp, tag_map):
a = AttributeRuler(nlp.vocab)
a.load_from_tag_map(tag_map)

View File

@ -138,6 +138,21 @@ Get all patterns that have been added to the attribute ruler in the
| ----------- | -------------------------------------------------------------------------------------------- |
| **RETURNS** | The patterns added to the attribute ruler. ~~List[Dict[str, Union[List[dict], dict, int]]]~~ |
## AttributeRuler.score {#score tag="method" new="3"}
Score a batch of examples.
> #### Example
>
> ```python
> scores = attribute_ruler.score(examples)
> ```
| Name | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `examples` | The examples to score. ~~Iterable[Example]~~ |
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ |
## AttributeRuler.load_from_tag_map {#load_from_tag_map tag="method"}
Load attribute ruler patterns from a tag map.

View File

@ -252,7 +252,7 @@ Score a batch of examples.
| Name | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `examples` | The examples to score. ~~Iterable[Example]~~ |
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"`, `"tag"` and `"lemma"`. ~~Dict[str, float]~~ |
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Dict[str, float]~~ |
## Tagger.create_optimizer {#create_optimizer tag="method"}