mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
parent
ea07f3022e
commit
414a69b736
23
spacy/tests/regression/test_issue1971.py
Normal file
23
spacy/tests/regression/test_issue1971.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from spacy.matcher import Matcher
|
||||
from spacy.tokens import Token, Doc
|
||||
|
||||
|
||||
def test_issue1971(en_vocab):
|
||||
# Possibly related to #2675 and #2671?
|
||||
matcher = Matcher(en_vocab)
|
||||
pattern = [
|
||||
{"ORTH": "Doe"},
|
||||
{"ORTH": "!", "OP": "?"},
|
||||
{"_": {"optional": True}, "OP": "?"},
|
||||
{"ORTH": "!", "OP": "?"},
|
||||
]
|
||||
Token.set_extension("optional", default=False)
|
||||
matcher.add("TEST", None, pattern)
|
||||
doc = Doc(en_vocab, words=["Hello", "John", "Doe", "!"])
|
||||
# We could also assert length 1 here, but this is more conclusive, because
|
||||
# the real problem here is that it returns a duplicate match for a match_id
|
||||
# that's not actually in the vocab!
|
||||
assert all(match_id in en_vocab.strings for match_id, start, end in matcher(doc))
|
Loading…
Reference in New Issue
Block a user