Improve matcher tests re issue #3328

This commit is contained in:
Matthew Honnibal 2019-02-27 10:25:56 +01:00
parent 8d6954e0e7
commit 2d3ce89b78

View File

@ -4,7 +4,7 @@ from __future__ import unicode_literals
import pytest import pytest
import re import re
from spacy.matcher import Matcher from spacy.matcher import Matcher
from spacy.tokens import Doc from spacy.tokens import Doc, Span
pattern1 = [{"ORTH": "A", "OP": "1"}, {"ORTH": "A", "OP": "*"}] pattern1 = [{"ORTH": "A", "OP": "1"}, {"ORTH": "A", "OP": "*"}]
@ -129,3 +129,17 @@ def test_matcher_end_zero_plus(en_vocab):
assert len(matcher(nlp("a b c"))) == 2 assert len(matcher(nlp("a b c"))) == 2
assert len(matcher(nlp("a b b c"))) == 3 assert len(matcher(nlp("a b b c"))) == 3
assert len(matcher(nlp("a b b"))) == 3 assert len(matcher(nlp("a b b"))) == 3
def test_matcher_sets_return_correct_tokens(en_vocab):
matcher = Matcher(en_vocab)
patterns = [
[{'LOWER': {'IN': ["zero"]}}],
[{'LOWER': {'IN': ["one"]}}],
[{'LOWER': {'IN': ["two"]}}],
]
matcher.add('TEST', None, *patterns)
doc = Doc(en_vocab, words="zero one two three".split())
matches = matcher(doc)
texts = [Span(doc, s, e, label=L).text for L, s, e in matches]
assert texts == ['zero', 'one', 'two']