From fd72b8b2824cd767ba20e7a747fc2c8ccf180db6 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 1 Oct 2015 16:21:00 +1000 Subject: [PATCH] * Add a test for Issue #118: Matcher behaves unpredictably with overlapping entities --- tests/matcher/test_matcher_bugfixes.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tests/matcher/test_matcher_bugfixes.py diff --git a/tests/matcher/test_matcher_bugfixes.py b/tests/matcher/test_matcher_bugfixes.py new file mode 100644 index 000000000..c768021db --- /dev/null +++ b/tests/matcher/test_matcher_bugfixes.py @@ -0,0 +1,19 @@ +import pytest + + +from spacy.matcher import Matcher + +def test_overlap_issue118(EN): + '''Test a bug that arose from having overlapping matches''' + doc = EN.tokenizer(u'how many points did lebron james score against the boston celtics last night') + ORG = doc.vocab.strings['ORG'] + matcher = Matcher(EN.vocab, {'BostonCeltics': ('ORG', {}, [[{'lower': 'boston'}, {'lower': 'celtics'}], [{'lower': 'celtics'}]])}) + + matches = matcher(doc) + assert matches == [(ORG, 9, 11)] + ents = list(doc.ents) + assert len(ents) == 1 + assert ents[0].label == ORG + assert ents[0].start == 9 + assert ents[0].end == 11 +