From d8391b1c4d344f12c89d78bce64779b24b35d658 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 20 Oct 2017 16:49:36 +0200 Subject: [PATCH] Fix #1434: Matcher failed on ending ? if no token --- spacy/matcher.pyx | 2 +- spacy/tests/regression/test_issue1434.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 spacy/tests/regression/test_issue1434.py diff --git a/spacy/matcher.pyx b/spacy/matcher.pyx index 24d0a9836..fa67f32d6 100644 --- a/spacy/matcher.pyx +++ b/spacy/matcher.pyx @@ -391,7 +391,7 @@ cdef class Matcher: matches.append((ent_id, start, end)) # Look for open patterns that are actually satisfied for state in partials: - while state.second.quantifier in (ZERO, ZERO_PLUS): + while state.second.quantifier in (ZERO, ZERO_ONE, ZERO_PLUS): state.second += 1 if state.second.nr_attr == 0: start = state.first diff --git a/spacy/tests/regression/test_issue1434.py b/spacy/tests/regression/test_issue1434.py new file mode 100644 index 000000000..ec3a34bb0 --- /dev/null +++ b/spacy/tests/regression/test_issue1434.py @@ -0,0 +1,22 @@ +from __future__ import unicode_literals + +from spacy.tokens import Doc +from spacy.vocab import Vocab +from spacy.matcher import Matcher +from spacy.lang.lex_attrs import LEX_ATTRS + + +def test_issue1434(): + '''Test matches occur when optional element at end of short doc''' + vocab = Vocab(lex_attr_getters=LEX_ATTRS) + hello_world = Doc(vocab, words=['Hello', 'World']) + hello = Doc(vocab, words=['Hello']) + + matcher = Matcher(vocab) + matcher.add('MyMatcher', None, + [ {'ORTH': 'Hello' }, {'IS_ALPHA': True, 'OP': '?'} ]) + + matches = matcher(hello_world) + assert matches + matches = matcher(hello) + assert matches