mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Sort the AttributeRuler matches by rule order
Sort the returned matches by rule order (the `match_id`) so that the rules are applied in the order they were added. This is necessary, for instance, if the `AttributeRuler` is used for the tag map and later rules require POS tags.
This commit is contained in:
parent
8674b17651
commit
0104bd1600
|
@ -78,7 +78,7 @@ class AttributeRuler(Pipe):
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/attributeruler#call
|
DOCS: https://spacy.io/api/attributeruler#call
|
||||||
"""
|
"""
|
||||||
matches = self.matcher(doc)
|
matches = sorted(self.matcher(doc))
|
||||||
|
|
||||||
for match_id, start, end in matches:
|
for match_id, start, end in matches:
|
||||||
span = Span(doc, start, end, label=match_id)
|
span = Span(doc, start, end, label=match_id)
|
||||||
|
|
|
@ -112,6 +112,28 @@ def test_attributeruler_score(nlp, pattern_dicts):
|
||||||
assert scores["morph_acc"] == pytest.approx(0.6)
|
assert scores["morph_acc"] == pytest.approx(0.6)
|
||||||
|
|
||||||
|
|
||||||
|
def test_attributeruler_rule_order(nlp):
|
||||||
|
a = AttributeRuler(nlp.vocab)
|
||||||
|
patterns = [
|
||||||
|
{
|
||||||
|
"patterns": [[{"TAG": "VBZ"}]],
|
||||||
|
"attrs": {"POS": "VERB"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"patterns": [[{"TAG": "VBZ"}]],
|
||||||
|
"attrs": {"POS": "NOUN"},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
a.add_patterns(patterns)
|
||||||
|
doc = get_doc(
|
||||||
|
nlp.vocab,
|
||||||
|
words=["This", "is", "a", "test", "."],
|
||||||
|
tags=["DT", "VBZ", "DT", "NN", "."]
|
||||||
|
)
|
||||||
|
doc = a(doc)
|
||||||
|
assert doc[1].pos_ == "NOUN"
|
||||||
|
|
||||||
|
|
||||||
def test_attributeruler_tag_map(nlp, tag_map):
|
def test_attributeruler_tag_map(nlp, tag_map):
|
||||||
a = AttributeRuler(nlp.vocab)
|
a = AttributeRuler(nlp.vocab)
|
||||||
a.load_from_tag_map(tag_map)
|
a.load_from_tag_map(tag_map)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user