Merge branch 'develop' into feature/disable-pipes

2025-10-29 23:17:59 +03:00 · 2017-10-25 16:33:12 +02:00 · 2017-10-25 16:33:12 +02:00 · de1e5f35d5
commit de1e5f35d5
parent 657a4d91bc c0b55ebdac
3 changed files with 33 additions and 9 deletions
--- a/spacy/matcher.pyx
+++ b/spacy/matcher.pyx
@ -230,7 +230,7 @@ cdef class Matcher:
        key (unicode): The match ID.
        RETURNS (bool): Whether the matcher contains rules for this match ID.
        """
-        return key in self._patterns
+        return self._normalize_key(key) in self._patterns

    def add(self, key, on_match, *patterns):
        """Add a match-rule to the matcher. A match-rule consists of: an ID key,
@ -490,7 +490,7 @@ cdef class PhraseMatcher:
        RETURNS (bool): Whether the matcher contains rules for this match ID.
        """
        cdef hash_t ent_id = self.matcher._normalize_key(key)
-        return ent_id in self.phrase_ids
+        return ent_id in self._callbacks

    def __reduce__(self):
        return (self.__class__, (self.vocab,), None, None)
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -48,7 +48,7 @@ from thinc.neural.util import get_array_module
 from .. import util
 from ..util import get_async, get_cuda_stream
 from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts
-from .._ml import Tok2Vec, doc2feats, rebatch, fine_tune
+from .._ml import Tok2Vec, doc2feats, rebatch
 from .._ml import Residual, drop_layer, flatten
 from .._ml import link_vectors_to_models
 from .._ml import HistoryFeatures
@ -253,7 +253,7 @@ cdef class Parser:
        hist_width = util.env_opt('history_width', cfg.get('hist_width', 0))
        if hist_size != 0:
            raise ValueError("Currently history size is hard-coded to 0")
-        if hist_width != 0: 
+        if hist_width != 0:
            raise ValueError("Currently history width is hard-coded to 0")
        tok2vec = Tok2Vec(token_vector_width, embed_size,
                          pretrained_dims=cfg.get('pretrained_dims', 0))
@ -413,7 +413,7 @@ cdef class Parser:
        for stcls in state_objs:
            if not stcls.c.is_final():
                states.push_back(stcls.c)
-                
+
        feat_weights = state2vec.get_feat_weights()
        cdef int i
        cdef np.ndarray hidden_weights = numpy.ascontiguousarray(vec2scores._layers[-1].W.T)
@ -432,7 +432,7 @@ cdef class Parser:
        PyErr_CheckSignals()
        return state_objs

-    cdef void _parseC(self, StateC* state, 
+    cdef void _parseC(self, StateC* state,
            const float* feat_weights, const float* hW, const float* hb,
            int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil:
        token_ids = <int*>calloc(nr_feat, sizeof(int))
@ -443,7 +443,7 @@ cdef class Parser:
            with gil:
                PyErr_SetFromErrno(MemoryError)
                PyErr_CheckSignals()
-        
+
        while not state.is_final():
            state.set_context_tokens(token_ids, nr_feat)
            memset(vectors, 0, nr_hidden * nr_piece * sizeof(float))
--- a/spacy/tests/test_matcher.py
+++ b/spacy/tests/test_matcher.py
@ -64,6 +64,12 @@ def test_matcher_init(en_vocab, words):
    assert matcher(doc) == []


+def test_matcher_contains(matcher):
+    matcher.add('TEST', None, [{'ORTH': 'test'}])
+    assert 'TEST' in matcher
+    assert 'TEST2' not in matcher
+
+
 def test_matcher_no_match(matcher):
    words = ["I", "like", "cheese", "."]
    doc = get_doc(matcher.vocab, words)
@ -112,7 +118,8 @@ def test_matcher_empty_dict(en_vocab):
    matcher.add('A.', None, [{'ORTH': 'a'}, {}])
    matches = matcher(doc)
    assert matches[0][1:] == (0, 2)
- 
+
+
 def test_matcher_operator_shadow(en_vocab):
    matcher = Matcher(en_vocab)
    abc = ["a", "b", "c"]
@ -123,7 +130,8 @@ def test_matcher_operator_shadow(en_vocab):
    matches = matcher(doc)
    assert len(matches) == 1
    assert matches[0][1:] == (0, 3)
- 
+
+
 def test_matcher_phrase_matcher(en_vocab):
    words = ["Google", "Now"]
    doc = get_doc(en_vocab, words)
@ -134,6 +142,22 @@ def test_matcher_phrase_matcher(en_vocab):
    assert len(matcher(doc)) == 1


+def test_phrase_matcher_length(en_vocab):
+    matcher = PhraseMatcher(en_vocab)
+    assert len(matcher) == 0
+    matcher.add('TEST', None, get_doc(en_vocab, ['test']))
+    assert len(matcher) == 1
+    matcher.add('TEST2', None, get_doc(en_vocab, ['test2']))
+    assert len(matcher) == 2
+
+
+def test_phrase_matcher_contains(en_vocab):
+    matcher = PhraseMatcher(en_vocab)
+    matcher.add('TEST', None, get_doc(en_vocab, ['test']))
+    assert 'TEST' in matcher
+    assert 'TEST2' not in matcher
+
+
 def test_matcher_match_zero(matcher):
    words1 = 'He said , " some words " ...'.split()
    words2 = 'He said , " some three words " ...'.split()