spaCy/spacy/tests/regression/test_issue3879.py

15 lines
463 B
Python
Raw Normal View History

# coding: utf8
from __future__ import unicode_literals
from spacy.matcher import Matcher
2019-08-20 17:41:58 +03:00
from spacy.tokens import Doc
def test_issue3879(en_vocab):
2019-08-20 17:41:58 +03:00
doc = Doc(en_vocab, words=["This", "is", "a", "test", "."])
assert len(doc) == 5
pattern = [{"ORTH": "This", "OP": "?"}, {"OP": "?"}, {"ORTH": "test"}]
2019-08-20 17:41:58 +03:00
matcher = Matcher(en_vocab)
matcher.add("TEST", None, pattern)
assert len(matcher(doc)) == 2 # fails because of a FP match 'is a test'