From 91beacf5e327a5898935050ff8fdb9b9d9268821 Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 25 Oct 2017 16:19:38 +0200 Subject: [PATCH 1/2] Fix Matcher.__contains__ --- spacy/matcher.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/matcher.pyx b/spacy/matcher.pyx index 6c1069578..fd4a8026a 100644 --- a/spacy/matcher.pyx +++ b/spacy/matcher.pyx @@ -230,7 +230,7 @@ cdef class Matcher: key (unicode): The match ID. RETURNS (bool): Whether the matcher contains rules for this match ID. """ - return key in self._patterns + return self._normalize_key(key) in self._patterns def add(self, key, on_match, *patterns): """Add a match-rule to the matcher. A match-rule consists of: an ID key, From c0b55ebdac8196f4432a381a1ad39d7746d19ded Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 25 Oct 2017 16:31:11 +0200 Subject: [PATCH 2/2] Fix PhraseMatcher.__contains__ and add more tests --- spacy/matcher.pyx | 2 +- spacy/tests/test_matcher.py | 28 ++++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/spacy/matcher.pyx b/spacy/matcher.pyx index fd4a8026a..401405c14 100644 --- a/spacy/matcher.pyx +++ b/spacy/matcher.pyx @@ -490,7 +490,7 @@ cdef class PhraseMatcher: RETURNS (bool): Whether the matcher contains rules for this match ID. """ cdef hash_t ent_id = self.matcher._normalize_key(key) - return ent_id in self.phrase_ids + return ent_id in self._callbacks def __reduce__(self): return (self.__class__, (self.vocab,), None, None) diff --git a/spacy/tests/test_matcher.py b/spacy/tests/test_matcher.py index 5b08ede39..8210467ea 100644 --- a/spacy/tests/test_matcher.py +++ b/spacy/tests/test_matcher.py @@ -64,6 +64,12 @@ def test_matcher_init(en_vocab, words): assert matcher(doc) == [] +def test_matcher_contains(matcher): + matcher.add('TEST', None, [{'ORTH': 'test'}]) + assert 'TEST' in matcher + assert 'TEST2' not in matcher + + def test_matcher_no_match(matcher): words = ["I", "like", "cheese", "."] doc = get_doc(matcher.vocab, words) @@ -112,7 +118,8 @@ def test_matcher_empty_dict(en_vocab): matcher.add('A.', None, [{'ORTH': 'a'}, {}]) matches = matcher(doc) assert matches[0][1:] == (0, 2) - + + def test_matcher_operator_shadow(en_vocab): matcher = Matcher(en_vocab) abc = ["a", "b", "c"] @@ -123,7 +130,8 @@ def test_matcher_operator_shadow(en_vocab): matches = matcher(doc) assert len(matches) == 1 assert matches[0][1:] == (0, 3) - + + def test_matcher_phrase_matcher(en_vocab): words = ["Google", "Now"] doc = get_doc(en_vocab, words) @@ -134,6 +142,22 @@ def test_matcher_phrase_matcher(en_vocab): assert len(matcher(doc)) == 1 +def test_phrase_matcher_length(en_vocab): + matcher = PhraseMatcher(en_vocab) + assert len(matcher) == 0 + matcher.add('TEST', None, get_doc(en_vocab, ['test'])) + assert len(matcher) == 1 + matcher.add('TEST2', None, get_doc(en_vocab, ['test2'])) + assert len(matcher) == 2 + + +def test_phrase_matcher_contains(en_vocab): + matcher = PhraseMatcher(en_vocab) + matcher.add('TEST', None, get_doc(en_vocab, ['test'])) + assert 'TEST' in matcher + assert 'TEST2' not in matcher + + def test_matcher_match_zero(matcher): words1 = 'He said , " some words " ...'.split() words2 = 'He said , " some three words " ...'.split()