mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Merge branch 'develop' into feature/disable-pipes
This commit is contained in:
		
						commit
						de1e5f35d5
					
				|  | @ -230,7 +230,7 @@ cdef class Matcher: | ||||||
|         key (unicode): The match ID. |         key (unicode): The match ID. | ||||||
|         RETURNS (bool): Whether the matcher contains rules for this match ID. |         RETURNS (bool): Whether the matcher contains rules for this match ID. | ||||||
|         """ |         """ | ||||||
|         return key in self._patterns |         return self._normalize_key(key) in self._patterns | ||||||
| 
 | 
 | ||||||
|     def add(self, key, on_match, *patterns): |     def add(self, key, on_match, *patterns): | ||||||
|         """Add a match-rule to the matcher. A match-rule consists of: an ID key, |         """Add a match-rule to the matcher. A match-rule consists of: an ID key, | ||||||
|  | @ -490,7 +490,7 @@ cdef class PhraseMatcher: | ||||||
|         RETURNS (bool): Whether the matcher contains rules for this match ID. |         RETURNS (bool): Whether the matcher contains rules for this match ID. | ||||||
|         """ |         """ | ||||||
|         cdef hash_t ent_id = self.matcher._normalize_key(key) |         cdef hash_t ent_id = self.matcher._normalize_key(key) | ||||||
|         return ent_id in self.phrase_ids |         return ent_id in self._callbacks | ||||||
| 
 | 
 | ||||||
|     def __reduce__(self): |     def __reduce__(self): | ||||||
|         return (self.__class__, (self.vocab,), None, None) |         return (self.__class__, (self.vocab,), None, None) | ||||||
|  |  | ||||||
|  | @ -48,7 +48,7 @@ from thinc.neural.util import get_array_module | ||||||
| from .. import util | from .. import util | ||||||
| from ..util import get_async, get_cuda_stream | from ..util import get_async, get_cuda_stream | ||||||
| from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts | from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts | ||||||
| from .._ml import Tok2Vec, doc2feats, rebatch, fine_tune | from .._ml import Tok2Vec, doc2feats, rebatch | ||||||
| from .._ml import Residual, drop_layer, flatten | from .._ml import Residual, drop_layer, flatten | ||||||
| from .._ml import link_vectors_to_models | from .._ml import link_vectors_to_models | ||||||
| from .._ml import HistoryFeatures | from .._ml import HistoryFeatures | ||||||
|  | @ -253,7 +253,7 @@ cdef class Parser: | ||||||
|         hist_width = util.env_opt('history_width', cfg.get('hist_width', 0)) |         hist_width = util.env_opt('history_width', cfg.get('hist_width', 0)) | ||||||
|         if hist_size != 0: |         if hist_size != 0: | ||||||
|             raise ValueError("Currently history size is hard-coded to 0") |             raise ValueError("Currently history size is hard-coded to 0") | ||||||
|         if hist_width != 0:  |         if hist_width != 0: | ||||||
|             raise ValueError("Currently history width is hard-coded to 0") |             raise ValueError("Currently history width is hard-coded to 0") | ||||||
|         tok2vec = Tok2Vec(token_vector_width, embed_size, |         tok2vec = Tok2Vec(token_vector_width, embed_size, | ||||||
|                           pretrained_dims=cfg.get('pretrained_dims', 0)) |                           pretrained_dims=cfg.get('pretrained_dims', 0)) | ||||||
|  | @ -413,7 +413,7 @@ cdef class Parser: | ||||||
|         for stcls in state_objs: |         for stcls in state_objs: | ||||||
|             if not stcls.c.is_final(): |             if not stcls.c.is_final(): | ||||||
|                 states.push_back(stcls.c) |                 states.push_back(stcls.c) | ||||||
|                  | 
 | ||||||
|         feat_weights = state2vec.get_feat_weights() |         feat_weights = state2vec.get_feat_weights() | ||||||
|         cdef int i |         cdef int i | ||||||
|         cdef np.ndarray hidden_weights = numpy.ascontiguousarray(vec2scores._layers[-1].W.T) |         cdef np.ndarray hidden_weights = numpy.ascontiguousarray(vec2scores._layers[-1].W.T) | ||||||
|  | @ -432,7 +432,7 @@ cdef class Parser: | ||||||
|         PyErr_CheckSignals() |         PyErr_CheckSignals() | ||||||
|         return state_objs |         return state_objs | ||||||
| 
 | 
 | ||||||
|     cdef void _parseC(self, StateC* state,  |     cdef void _parseC(self, StateC* state, | ||||||
|             const float* feat_weights, const float* hW, const float* hb, |             const float* feat_weights, const float* hW, const float* hb, | ||||||
|             int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil: |             int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil: | ||||||
|         token_ids = <int*>calloc(nr_feat, sizeof(int)) |         token_ids = <int*>calloc(nr_feat, sizeof(int)) | ||||||
|  | @ -443,7 +443,7 @@ cdef class Parser: | ||||||
|             with gil: |             with gil: | ||||||
|                 PyErr_SetFromErrno(MemoryError) |                 PyErr_SetFromErrno(MemoryError) | ||||||
|                 PyErr_CheckSignals() |                 PyErr_CheckSignals() | ||||||
|          | 
 | ||||||
|         while not state.is_final(): |         while not state.is_final(): | ||||||
|             state.set_context_tokens(token_ids, nr_feat) |             state.set_context_tokens(token_ids, nr_feat) | ||||||
|             memset(vectors, 0, nr_hidden * nr_piece * sizeof(float)) |             memset(vectors, 0, nr_hidden * nr_piece * sizeof(float)) | ||||||
|  |  | ||||||
|  | @ -64,6 +64,12 @@ def test_matcher_init(en_vocab, words): | ||||||
|     assert matcher(doc) == [] |     assert matcher(doc) == [] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def test_matcher_contains(matcher): | ||||||
|  |     matcher.add('TEST', None, [{'ORTH': 'test'}]) | ||||||
|  |     assert 'TEST' in matcher | ||||||
|  |     assert 'TEST2' not in matcher | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def test_matcher_no_match(matcher): | def test_matcher_no_match(matcher): | ||||||
|     words = ["I", "like", "cheese", "."] |     words = ["I", "like", "cheese", "."] | ||||||
|     doc = get_doc(matcher.vocab, words) |     doc = get_doc(matcher.vocab, words) | ||||||
|  | @ -112,7 +118,8 @@ def test_matcher_empty_dict(en_vocab): | ||||||
|     matcher.add('A.', None, [{'ORTH': 'a'}, {}]) |     matcher.add('A.', None, [{'ORTH': 'a'}, {}]) | ||||||
|     matches = matcher(doc) |     matches = matcher(doc) | ||||||
|     assert matches[0][1:] == (0, 2) |     assert matches[0][1:] == (0, 2) | ||||||
|   | 
 | ||||||
|  | 
 | ||||||
| def test_matcher_operator_shadow(en_vocab): | def test_matcher_operator_shadow(en_vocab): | ||||||
|     matcher = Matcher(en_vocab) |     matcher = Matcher(en_vocab) | ||||||
|     abc = ["a", "b", "c"] |     abc = ["a", "b", "c"] | ||||||
|  | @ -123,7 +130,8 @@ def test_matcher_operator_shadow(en_vocab): | ||||||
|     matches = matcher(doc) |     matches = matcher(doc) | ||||||
|     assert len(matches) == 1 |     assert len(matches) == 1 | ||||||
|     assert matches[0][1:] == (0, 3) |     assert matches[0][1:] == (0, 3) | ||||||
|   | 
 | ||||||
|  | 
 | ||||||
| def test_matcher_phrase_matcher(en_vocab): | def test_matcher_phrase_matcher(en_vocab): | ||||||
|     words = ["Google", "Now"] |     words = ["Google", "Now"] | ||||||
|     doc = get_doc(en_vocab, words) |     doc = get_doc(en_vocab, words) | ||||||
|  | @ -134,6 +142,22 @@ def test_matcher_phrase_matcher(en_vocab): | ||||||
|     assert len(matcher(doc)) == 1 |     assert len(matcher(doc)) == 1 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def test_phrase_matcher_length(en_vocab): | ||||||
|  |     matcher = PhraseMatcher(en_vocab) | ||||||
|  |     assert len(matcher) == 0 | ||||||
|  |     matcher.add('TEST', None, get_doc(en_vocab, ['test'])) | ||||||
|  |     assert len(matcher) == 1 | ||||||
|  |     matcher.add('TEST2', None, get_doc(en_vocab, ['test2'])) | ||||||
|  |     assert len(matcher) == 2 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_phrase_matcher_contains(en_vocab): | ||||||
|  |     matcher = PhraseMatcher(en_vocab) | ||||||
|  |     matcher.add('TEST', None, get_doc(en_vocab, ['test'])) | ||||||
|  |     assert 'TEST' in matcher | ||||||
|  |     assert 'TEST2' not in matcher | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def test_matcher_match_zero(matcher): | def test_matcher_match_zero(matcher): | ||||||
|     words1 = 'He said , " some words " ...'.split() |     words1 = 'He said , " some words " ...'.split() | ||||||
|     words2 = 'He said , " some three words " ...'.split() |     words2 = 'He said , " some three words " ...'.split() | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user