Sort the AttributeRuler matches by rule order

Sort the returned matches by rule order (the `match_id`) so that the
rules are applied in the order they were added. This is necessary, for
instance, if the `AttributeRuler` is used for the tag map and later
rules require POS tags.
This commit is contained in:
Adriane Boyd 2020-08-28 20:45:19 +02:00
parent 8674b17651
commit 0104bd1600
2 changed files with 23 additions and 1 deletions

View File

@ -78,7 +78,7 @@ class AttributeRuler(Pipe):
DOCS: https://spacy.io/api/attributeruler#call
"""
matches = self.matcher(doc)
matches = sorted(self.matcher(doc))
for match_id, start, end in matches:
span = Span(doc, start, end, label=match_id)

View File

@ -112,6 +112,28 @@ def test_attributeruler_score(nlp, pattern_dicts):
assert scores["morph_acc"] == pytest.approx(0.6)
def test_attributeruler_rule_order(nlp):
a = AttributeRuler(nlp.vocab)
patterns = [
{
"patterns": [[{"TAG": "VBZ"}]],
"attrs": {"POS": "VERB"},
},
{
"patterns": [[{"TAG": "VBZ"}]],
"attrs": {"POS": "NOUN"},
},
]
a.add_patterns(patterns)
doc = get_doc(
nlp.vocab,
words=["This", "is", "a", "test", "."],
tags=["DT", "VBZ", "DT", "NN", "."]
)
doc = a(doc)
assert doc[1].pos_ == "NOUN"
def test_attributeruler_tag_map(nlp, tag_map):
a = AttributeRuler(nlp.vocab)
a.load_from_tag_map(tag_map)