mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
Sort the AttributeRuler matches by rule order
Sort the returned matches by rule order (the `match_id`) so that the rules are applied in the order they were added. This is necessary, for instance, if the `AttributeRuler` is used for the tag map and later rules require POS tags.
This commit is contained in:
parent
8674b17651
commit
0104bd1600
|
@ -78,7 +78,7 @@ class AttributeRuler(Pipe):
|
|||
|
||||
DOCS: https://spacy.io/api/attributeruler#call
|
||||
"""
|
||||
matches = self.matcher(doc)
|
||||
matches = sorted(self.matcher(doc))
|
||||
|
||||
for match_id, start, end in matches:
|
||||
span = Span(doc, start, end, label=match_id)
|
||||
|
|
|
@ -112,6 +112,28 @@ def test_attributeruler_score(nlp, pattern_dicts):
|
|||
assert scores["morph_acc"] == pytest.approx(0.6)
|
||||
|
||||
|
||||
def test_attributeruler_rule_order(nlp):
|
||||
a = AttributeRuler(nlp.vocab)
|
||||
patterns = [
|
||||
{
|
||||
"patterns": [[{"TAG": "VBZ"}]],
|
||||
"attrs": {"POS": "VERB"},
|
||||
},
|
||||
{
|
||||
"patterns": [[{"TAG": "VBZ"}]],
|
||||
"attrs": {"POS": "NOUN"},
|
||||
},
|
||||
]
|
||||
a.add_patterns(patterns)
|
||||
doc = get_doc(
|
||||
nlp.vocab,
|
||||
words=["This", "is", "a", "test", "."],
|
||||
tags=["DT", "VBZ", "DT", "NN", "."]
|
||||
)
|
||||
doc = a(doc)
|
||||
assert doc[1].pos_ == "NOUN"
|
||||
|
||||
|
||||
def test_attributeruler_tag_map(nlp, tag_map):
|
||||
a = AttributeRuler(nlp.vocab)
|
||||
a.load_from_tag_map(tag_map)
|
||||
|
|
Loading…
Reference in New Issue
Block a user