Add AttributeRuler.score (#5963)

* Add AttributeRuler.score

Add scoring for TAG / POS / MORPH / LEMMA if these are present in the
assigned token attributes.

Add default score weights (that don't really make a lot of sense) so
that the scores are in the default config in some form.

* Update docs
This commit is contained in:
Adriane Boyd 2020-08-26 15:39:30 +02:00 committed by GitHub
parent 79d460e3a2
commit 90d88729e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 64 additions and 3 deletions

View File

@ -4,9 +4,11 @@ from pathlib import Path
from .pipe import Pipe from .pipe import Pipe
from ..errors import Errors from ..errors import Errors
from ..gold import validate_examples
from ..language import Language from ..language import Language
from ..matcher import Matcher from ..matcher import Matcher
from ..symbols import IDS from ..scorer import Scorer
from ..symbols import IDS, TAG, POS, MORPH, LEMMA
from ..tokens import Doc, Span from ..tokens import Doc, Span
from ..tokens._retokenize import normalize_token_attrs, set_token_attrs from ..tokens._retokenize import normalize_token_attrs, set_token_attrs
from ..vocab import Vocab from ..vocab import Vocab
@ -192,6 +194,32 @@ class AttributeRuler(Pipe):
all_patterns.append(p) all_patterns.append(p)
return all_patterns return all_patterns
def score(self, examples, **kwargs):
"""Score a batch of examples.
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores, produced by
Scorer.score_token_attr for the attributes "tag", "pos", "morph"
and "lemma" for the target token attributes.
DOCS: https://spacy.io/api/tagger#score
"""
validate_examples(examples, "AttributeRuler.score")
results = {}
attrs = set()
for token_attrs in self.attrs:
attrs.update(token_attrs)
for attr in attrs:
if attr == TAG:
results.update(Scorer.score_token_attr(examples, "tag", **kwargs))
elif attr == POS:
results.update(Scorer.score_token_attr(examples, "pos", **kwargs))
elif attr == MORPH:
results.update(Scorer.score_token_attr(examples, "morph", **kwargs))
elif attr == LEMMA:
results.update(Scorer.score_token_attr(examples, "lemma", **kwargs))
return results
def to_bytes(self, exclude: Iterable[str] = tuple()) -> bytes: def to_bytes(self, exclude: Iterable[str] = tuple()) -> bytes:
"""Serialize the AttributeRuler to a bytestring. """Serialize the AttributeRuler to a bytestring.

View File

@ -307,7 +307,7 @@ class Tagger(Pipe):
examples (Iterable[Example]): The examples to score. examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores, produced by RETURNS (Dict[str, Any]): The scores, produced by
Scorer.score_token_attr for the attributes "tag", "pos" and "lemma". Scorer.score_token_attr for the attributes "tag".
DOCS: https://spacy.io/api/tagger#score DOCS: https://spacy.io/api/tagger#score
""" """

View File

@ -1,5 +1,6 @@
import pytest import pytest
import numpy import numpy
from spacy.gold import Example
from spacy.lang.en import English from spacy.lang.en import English
from spacy.pipeline import AttributeRuler from spacy.pipeline import AttributeRuler
from spacy import util, registry from spacy import util, registry
@ -94,6 +95,23 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
assert doc[3].morph_ == "Case=Nom|Number=Sing" assert doc[3].morph_ == "Case=Nom|Number=Sing"
def test_attributeruler_score(nlp, pattern_dicts):
# initialize with patterns
nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
doc = nlp("This is a test.")
assert doc[2].lemma_ == "the"
assert doc[2].morph_ == "Case=Nom|Number=Plur"
assert doc[3].lemma_ == "cat"
assert doc[3].morph_ == "Case=Nom|Number=Sing"
dev_examples = [Example.from_dict(nlp.make_doc("This is a test."), {"lemmas": ["this", "is", "a", "cat", "."]})]
scores = nlp.evaluate(dev_examples)
# "cat" is the only correct lemma
assert scores["lemma_acc"] == pytest.approx(0.2)
# the empty morphs are correct
assert scores["morph_acc"] == pytest.approx(0.6)
def test_attributeruler_tag_map(nlp, tag_map): def test_attributeruler_tag_map(nlp, tag_map):
a = AttributeRuler(nlp.vocab) a = AttributeRuler(nlp.vocab)
a.load_from_tag_map(tag_map) a.load_from_tag_map(tag_map)

View File

@ -138,6 +138,21 @@ Get all patterns that have been added to the attribute ruler in the
| ----------- | -------------------------------------------------------------------------------------------- | | ----------- | -------------------------------------------------------------------------------------------- |
| **RETURNS** | The patterns added to the attribute ruler. ~~List[Dict[str, Union[List[dict], dict, int]]]~~ | | **RETURNS** | The patterns added to the attribute ruler. ~~List[Dict[str, Union[List[dict], dict, int]]]~~ |
## AttributeRuler.score {#score tag="method" new="3"}
Score a batch of examples.
> #### Example
>
> ```python
> scores = attribute_ruler.score(examples)
> ```
| Name | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `examples` | The examples to score. ~~Iterable[Example]~~ |
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ |
## AttributeRuler.load_from_tag_map {#load_from_tag_map tag="method"} ## AttributeRuler.load_from_tag_map {#load_from_tag_map tag="method"}
Load attribute ruler patterns from a tag map. Load attribute ruler patterns from a tag map.

View File

@ -252,7 +252,7 @@ Score a batch of examples.
| Name | Description | | Name | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `examples` | The examples to score. ~~Iterable[Example]~~ | | `examples` | The examples to score. ~~Iterable[Example]~~ |
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"`, `"tag"` and `"lemma"`. ~~Dict[str, float]~~ | | **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Dict[str, float]~~ |
## Tagger.create_optimizer {#create_optimizer tag="method"} ## Tagger.create_optimizer {#create_optimizer tag="method"}