Add AttributeRuler.score (#5963)

* Add AttributeRuler.score Add scoring for TAG / POS / MORPH / LEMMA if these are present in the assigned token attributes. Add default score weights (that don't really make a lot of sense) so that the scores are in the default config in some form. * Update docs
2025-11-09 20:38:06 +03:00 · 2020-08-26 15:39:30 +02:00 · 2020-08-26 15:39:30 +02:00 · 90d88729e0
commit 90d88729e0
parent 79d460e3a2
5 changed files with 64 additions and 3 deletions
--- a/spacy/pipeline/attributeruler.py
+++ b/spacy/pipeline/attributeruler.py
@ -4,9 +4,11 @@ from pathlib import Path

 from .pipe import Pipe
 from ..errors import Errors
+from ..gold import validate_examples
 from ..language import Language
 from ..matcher import Matcher
-from ..symbols import IDS
+from ..scorer import Scorer
+from ..symbols import IDS, TAG, POS, MORPH, LEMMA
 from ..tokens import Doc, Span
 from ..tokens._retokenize import normalize_token_attrs, set_token_attrs
 from ..vocab import Vocab
@ -192,6 +194,32 @@ class AttributeRuler(Pipe):
            all_patterns.append(p)
        return all_patterns

+    def score(self, examples, **kwargs):
+        """Score a batch of examples.
+
+        examples (Iterable[Example]): The examples to score.
+        RETURNS (Dict[str, Any]): The scores, produced by
+            Scorer.score_token_attr for the attributes "tag", "pos", "morph"
+            and "lemma" for the target token attributes.
+
+        DOCS: https://spacy.io/api/tagger#score
+        """
+        validate_examples(examples, "AttributeRuler.score")
+        results = {}
+        attrs = set()
+        for token_attrs in self.attrs:
+            attrs.update(token_attrs)
+        for attr in attrs:
+            if attr == TAG:
+                results.update(Scorer.score_token_attr(examples, "tag", **kwargs))
+            elif attr == POS:
+                results.update(Scorer.score_token_attr(examples, "pos", **kwargs))
+            elif attr == MORPH:
+                results.update(Scorer.score_token_attr(examples, "morph", **kwargs))
+            elif attr == LEMMA:
+                results.update(Scorer.score_token_attr(examples, "lemma", **kwargs))
+        return results
+
    def to_bytes(self, exclude: Iterable[str] = tuple()) -> bytes:
        """Serialize the AttributeRuler to a bytestring.

--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@ -307,7 +307,7 @@ class Tagger(Pipe):

        examples (Iterable[Example]): The examples to score.
        RETURNS (Dict[str, Any]): The scores, produced by
-            Scorer.score_token_attr for the attributes "tag", "pos" and "lemma".
+            Scorer.score_token_attr for the attributes "tag".

        DOCS: https://spacy.io/api/tagger#score
        """
--- a/spacy/tests/pipeline/test_attributeruler.py
+++ b/spacy/tests/pipeline/test_attributeruler.py
@ -1,5 +1,6 @@
 import pytest
 import numpy
+from spacy.gold import Example
 from spacy.lang.en import English
 from spacy.pipeline import AttributeRuler
 from spacy import util, registry
@ -94,6 +95,23 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
    assert doc[3].morph_ == "Case=Nom|Number=Sing"


+def test_attributeruler_score(nlp, pattern_dicts):
+    # initialize with patterns
+    nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
+    doc = nlp("This is a test.")
+    assert doc[2].lemma_ == "the"
+    assert doc[2].morph_ == "Case=Nom|Number=Plur"
+    assert doc[3].lemma_ == "cat"
+    assert doc[3].morph_ == "Case=Nom|Number=Sing"
+
+    dev_examples = [Example.from_dict(nlp.make_doc("This is a test."), {"lemmas": ["this", "is", "a", "cat", "."]})]
+    scores = nlp.evaluate(dev_examples)
+    # "cat" is the only correct lemma
+    assert scores["lemma_acc"] == pytest.approx(0.2)
+    # the empty morphs are correct
+    assert scores["morph_acc"] == pytest.approx(0.6)
+
+
 def test_attributeruler_tag_map(nlp, tag_map):
    a = AttributeRuler(nlp.vocab)
    a.load_from_tag_map(tag_map)
--- a/website/docs/api/attributeruler.md
+++ b/website/docs/api/attributeruler.md
@ -138,6 +138,21 @@ Get all patterns that have been added to the attribute ruler in the
 | ----------- | -------------------------------------------------------------------------------------------- |
 | **RETURNS** | The patterns added to the attribute ruler. ~~List[Dict[str, Union[List[dict], dict, int]]]~~ |

+## AttributeRuler.score {#score tag="method" new="3"}
+
+Score a batch of examples.
+
+> #### Example
+>
+> ```python
+> scores = attribute_ruler.score(examples)
+> ```
+
+| Name        | Description                                                                                                                                                                                                           |
+| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                                                                                                          |
+| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ |
+
 ## AttributeRuler.load_from_tag_map {#load_from_tag_map tag="method"}

 Load attribute ruler patterns from a tag map.
--- a/website/docs/api/tagger.md
+++ b/website/docs/api/tagger.md
@ -252,7 +252,7 @@ Score a batch of examples.
 | Name        | Description                                                                                                                                               |
 | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                                              |
-| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"`, `"tag"` and `"lemma"`. ~~Dict[str, float]~~ |
+| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Dict[str, float]~~ |

 ## Tagger.create_optimizer {#create_optimizer tag="method"}