2019-08-20 17:40:25 +03:00
|
|
|
# coding: utf8
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
from spacy.matcher import Matcher
|
2019-08-20 17:41:58 +03:00
|
|
|
from spacy.tokens import Doc
|
2019-08-20 17:40:25 +03:00
|
|
|
|
|
|
|
|
|
|
|
def test_issue3879(en_vocab):
|
2019-08-20 17:41:58 +03:00
|
|
|
doc = Doc(en_vocab, words=["This", "is", "a", "test", "."])
|
2019-08-20 17:40:25 +03:00
|
|
|
assert len(doc) == 5
|
|
|
|
pattern = [{"ORTH": "This", "OP": "?"}, {"OP": "?"}, {"ORTH": "test"}]
|
2019-08-20 17:41:58 +03:00
|
|
|
matcher = Matcher(en_vocab)
|
2019-10-25 23:21:08 +03:00
|
|
|
matcher.add("TEST", [pattern])
|
2019-08-20 17:41:58 +03:00
|
|
|
assert len(matcher(doc)) == 2 # fails because of a FP match 'is a test'
|