Add AttributeRuler.score (#5963)

* Add AttributeRuler.score Add scoring for TAG / POS / MORPH / LEMMA if these are present in the assigned token attributes. Add default score weights (that don't really make a lot of sense) so that the scores are in the default config in some form. * Update docs
2025-09-03 02:44:56 +03:00 · 2020-08-26 15:39:30 +02:00 · 2020-08-26 15:39:30 +02:00 · 90d88729e0
commit 90d88729e0
parent 79d460e3a2
5 changed files with 64 additions and 3 deletions
--- a/spacy/pipeline/attributeruler.py
+++ b/spacy/pipeline/attributeruler.py
@ -4,9 +4,11 @@ from pathlib import Path
 from .pipe import Pipe
 from ..errors import Errors
 from ..gold import validate_examples
 from ..language import Language
 from ..matcher import Matcher
-from ..symbols import IDS
+from ..scorer import Scorer
 from ..symbols import IDS, TAG, POS, MORPH, LEMMA
 from ..tokens import Doc, Span
 from ..tokens._retokenize import normalize_token_attrs, set_token_attrs
 from ..vocab import Vocab
@ -192,6 +194,32 @@ class AttributeRuler(Pipe):
            all_patterns.append(p)
        return all_patterns
    def score(self, examples, **kwargs):
        """Score a batch of examples.
        examples (Iterable[Example]): The examples to score.
        RETURNS (Dict[str, Any]): The scores, produced by
            Scorer.score_token_attr for the attributes "tag", "pos", "morph"
            and "lemma" for the target token attributes.
        DOCS: https://spacy.io/api/tagger#score
        """
        validate_examples(examples, "AttributeRuler.score")
        results = {}
        attrs = set()
        for token_attrs in self.attrs:
            attrs.update(token_attrs)
        for attr in attrs:
            if attr == TAG:
                results.update(Scorer.score_token_attr(examples, "tag", **kwargs))
            elif attr == POS:
                results.update(Scorer.score_token_attr(examples, "pos", **kwargs))
            elif attr == MORPH:
                results.update(Scorer.score_token_attr(examples, "morph", **kwargs))
            elif attr == LEMMA:
                results.update(Scorer.score_token_attr(examples, "lemma", **kwargs))
        return results
    def to_bytes(self, exclude: Iterable[str] = tuple()) -> bytes:
        """Serialize the AttributeRuler to a bytestring.
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@ -307,7 +307,7 @@ class Tagger(Pipe):
        examples (Iterable[Example]): The examples to score.
        RETURNS (Dict[str, Any]): The scores, produced by
-            Scorer.score_token_attr for the attributes "tag", "pos" and "lemma".
+            Scorer.score_token_attr for the attributes "tag".
        DOCS: https://spacy.io/api/tagger#score
        """
--- a/spacy/tests/pipeline/test_attributeruler.py
+++ b/spacy/tests/pipeline/test_attributeruler.py
@ -1,5 +1,6 @@
 import pytest
 import numpy
 from spacy.gold import Example
 from spacy.lang.en import English
 from spacy.pipeline import AttributeRuler
 from spacy import util, registry
@ -94,6 +95,23 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
    assert doc[3].morph_ == "Case=Nom|Number=Sing"
 def test_attributeruler_score(nlp, pattern_dicts):
    # initialize with patterns
    nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
    doc = nlp("This is a test.")
    assert doc[2].lemma_ == "the"
    assert doc[2].morph_ == "Case=Nom|Number=Plur"
    assert doc[3].lemma_ == "cat"
    assert doc[3].morph_ == "Case=Nom|Number=Sing"
    dev_examples = [Example.from_dict(nlp.make_doc("This is a test."), {"lemmas": ["this", "is", "a", "cat", "."]})]
    scores = nlp.evaluate(dev_examples)
    # "cat" is the only correct lemma
    assert scores["lemma_acc"] == pytest.approx(0.2)
    # the empty morphs are correct
    assert scores["morph_acc"] == pytest.approx(0.6)
 def test_attributeruler_tag_map(nlp, tag_map):
    a = AttributeRuler(nlp.vocab)
    a.load_from_tag_map(tag_map)
--- a/website/docs/api/attributeruler.md
+++ b/website/docs/api/attributeruler.md
@ -138,6 +138,21 @@ Get all patterns that have been added to the attribute ruler in the
 | ----------- | -------------------------------------------------------------------------------------------- |
 | **RETURNS** | The patterns added to the attribute ruler. ~~List[Dict[str, Union[List[dict], dict, int]]]~~ |
 ## AttributeRuler.score {#score tag="method" new="3"}
 Score a batch of examples.
 > #### Example
 >
 > ```python
 > scores = attribute_ruler.score(examples)
 > ```
 | Name        | Description                                                                                                                                                                                                           |
 | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                                                                                                          |
 | **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ |
 ## AttributeRuler.load_from_tag_map {#load_from_tag_map tag="method"}
 Load attribute ruler patterns from a tag map.
--- a/website/docs/api/tagger.md
+++ b/website/docs/api/tagger.md
@ -252,7 +252,7 @@ Score a batch of examples.
 | Name        | Description                                                                                                                                               |
 | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                                              |
-| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"`, `"tag"` and `"lemma"`. ~~Dict[str, float]~~ |
+| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Dict[str, float]~~ |
 ## Tagger.create_optimizer {#create_optimizer tag="method"}