Sort the AttributeRuler matches by rule order

Sort the returned matches by rule order (the `match_id`) so that the rules are applied in the order they were added. This is necessary, for instance, if the `AttributeRuler` is used for the tag map and later rules require POS tags.
2025-07-31 02:19:46 +03:00 · 2020-08-28 20:45:19 +02:00 · 2020-08-28 20:45:19 +02:00 · 0104bd1600
commit 0104bd1600
parent 8674b17651
2 changed files with 23 additions and 1 deletions
--- a/spacy/pipeline/attributeruler.py
+++ b/spacy/pipeline/attributeruler.py
@ -78,7 +78,7 @@ class AttributeRuler(Pipe):

        DOCS: https://spacy.io/api/attributeruler#call
        """
-        matches = self.matcher(doc)
+        matches = sorted(self.matcher(doc))

        for match_id, start, end in matches:
            span = Span(doc, start, end, label=match_id)
--- a/spacy/tests/pipeline/test_attributeruler.py
+++ b/spacy/tests/pipeline/test_attributeruler.py
@ -112,6 +112,28 @@ def test_attributeruler_score(nlp, pattern_dicts):
    assert scores["morph_acc"] == pytest.approx(0.6)


+def test_attributeruler_rule_order(nlp):
+    a = AttributeRuler(nlp.vocab)
+    patterns = [
+        {
+            "patterns": [[{"TAG": "VBZ"}]],
+            "attrs": {"POS": "VERB"},
+        },
+        {
+            "patterns": [[{"TAG": "VBZ"}]],
+            "attrs": {"POS": "NOUN"},
+        },
+    ]
+    a.add_patterns(patterns)
+    doc = get_doc(
+        nlp.vocab,
+        words=["This", "is", "a", "test", "."],
+        tags=["DT", "VBZ", "DT", "NN", "."]
+    )
+    doc = a(doc)
+    assert doc[1].pos_ == "NOUN"
+
+
 def test_attributeruler_tag_map(nlp, tag_map):
    a = AttributeRuler(nlp.vocab)
    a.load_from_tag_map(tag_map)