mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-02 04:03:19 +03:00
Merge branch 'develop' into feature/disable-pipes
This commit is contained in:
commit
de1e5f35d5
|
@ -230,7 +230,7 @@ cdef class Matcher:
|
||||||
key (unicode): The match ID.
|
key (unicode): The match ID.
|
||||||
RETURNS (bool): Whether the matcher contains rules for this match ID.
|
RETURNS (bool): Whether the matcher contains rules for this match ID.
|
||||||
"""
|
"""
|
||||||
return key in self._patterns
|
return self._normalize_key(key) in self._patterns
|
||||||
|
|
||||||
def add(self, key, on_match, *patterns):
|
def add(self, key, on_match, *patterns):
|
||||||
"""Add a match-rule to the matcher. A match-rule consists of: an ID key,
|
"""Add a match-rule to the matcher. A match-rule consists of: an ID key,
|
||||||
|
@ -490,7 +490,7 @@ cdef class PhraseMatcher:
|
||||||
RETURNS (bool): Whether the matcher contains rules for this match ID.
|
RETURNS (bool): Whether the matcher contains rules for this match ID.
|
||||||
"""
|
"""
|
||||||
cdef hash_t ent_id = self.matcher._normalize_key(key)
|
cdef hash_t ent_id = self.matcher._normalize_key(key)
|
||||||
return ent_id in self.phrase_ids
|
return ent_id in self._callbacks
|
||||||
|
|
||||||
def __reduce__(self):
|
def __reduce__(self):
|
||||||
return (self.__class__, (self.vocab,), None, None)
|
return (self.__class__, (self.vocab,), None, None)
|
||||||
|
|
|
@ -48,7 +48,7 @@ from thinc.neural.util import get_array_module
|
||||||
from .. import util
|
from .. import util
|
||||||
from ..util import get_async, get_cuda_stream
|
from ..util import get_async, get_cuda_stream
|
||||||
from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts
|
from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts
|
||||||
from .._ml import Tok2Vec, doc2feats, rebatch, fine_tune
|
from .._ml import Tok2Vec, doc2feats, rebatch
|
||||||
from .._ml import Residual, drop_layer, flatten
|
from .._ml import Residual, drop_layer, flatten
|
||||||
from .._ml import link_vectors_to_models
|
from .._ml import link_vectors_to_models
|
||||||
from .._ml import HistoryFeatures
|
from .._ml import HistoryFeatures
|
||||||
|
@ -253,7 +253,7 @@ cdef class Parser:
|
||||||
hist_width = util.env_opt('history_width', cfg.get('hist_width', 0))
|
hist_width = util.env_opt('history_width', cfg.get('hist_width', 0))
|
||||||
if hist_size != 0:
|
if hist_size != 0:
|
||||||
raise ValueError("Currently history size is hard-coded to 0")
|
raise ValueError("Currently history size is hard-coded to 0")
|
||||||
if hist_width != 0:
|
if hist_width != 0:
|
||||||
raise ValueError("Currently history width is hard-coded to 0")
|
raise ValueError("Currently history width is hard-coded to 0")
|
||||||
tok2vec = Tok2Vec(token_vector_width, embed_size,
|
tok2vec = Tok2Vec(token_vector_width, embed_size,
|
||||||
pretrained_dims=cfg.get('pretrained_dims', 0))
|
pretrained_dims=cfg.get('pretrained_dims', 0))
|
||||||
|
@ -413,7 +413,7 @@ cdef class Parser:
|
||||||
for stcls in state_objs:
|
for stcls in state_objs:
|
||||||
if not stcls.c.is_final():
|
if not stcls.c.is_final():
|
||||||
states.push_back(stcls.c)
|
states.push_back(stcls.c)
|
||||||
|
|
||||||
feat_weights = state2vec.get_feat_weights()
|
feat_weights = state2vec.get_feat_weights()
|
||||||
cdef int i
|
cdef int i
|
||||||
cdef np.ndarray hidden_weights = numpy.ascontiguousarray(vec2scores._layers[-1].W.T)
|
cdef np.ndarray hidden_weights = numpy.ascontiguousarray(vec2scores._layers[-1].W.T)
|
||||||
|
@ -432,7 +432,7 @@ cdef class Parser:
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
return state_objs
|
return state_objs
|
||||||
|
|
||||||
cdef void _parseC(self, StateC* state,
|
cdef void _parseC(self, StateC* state,
|
||||||
const float* feat_weights, const float* hW, const float* hb,
|
const float* feat_weights, const float* hW, const float* hb,
|
||||||
int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil:
|
int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil:
|
||||||
token_ids = <int*>calloc(nr_feat, sizeof(int))
|
token_ids = <int*>calloc(nr_feat, sizeof(int))
|
||||||
|
@ -443,7 +443,7 @@ cdef class Parser:
|
||||||
with gil:
|
with gil:
|
||||||
PyErr_SetFromErrno(MemoryError)
|
PyErr_SetFromErrno(MemoryError)
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
|
|
||||||
while not state.is_final():
|
while not state.is_final():
|
||||||
state.set_context_tokens(token_ids, nr_feat)
|
state.set_context_tokens(token_ids, nr_feat)
|
||||||
memset(vectors, 0, nr_hidden * nr_piece * sizeof(float))
|
memset(vectors, 0, nr_hidden * nr_piece * sizeof(float))
|
||||||
|
|
|
@ -64,6 +64,12 @@ def test_matcher_init(en_vocab, words):
|
||||||
assert matcher(doc) == []
|
assert matcher(doc) == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_matcher_contains(matcher):
|
||||||
|
matcher.add('TEST', None, [{'ORTH': 'test'}])
|
||||||
|
assert 'TEST' in matcher
|
||||||
|
assert 'TEST2' not in matcher
|
||||||
|
|
||||||
|
|
||||||
def test_matcher_no_match(matcher):
|
def test_matcher_no_match(matcher):
|
||||||
words = ["I", "like", "cheese", "."]
|
words = ["I", "like", "cheese", "."]
|
||||||
doc = get_doc(matcher.vocab, words)
|
doc = get_doc(matcher.vocab, words)
|
||||||
|
@ -112,7 +118,8 @@ def test_matcher_empty_dict(en_vocab):
|
||||||
matcher.add('A.', None, [{'ORTH': 'a'}, {}])
|
matcher.add('A.', None, [{'ORTH': 'a'}, {}])
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
assert matches[0][1:] == (0, 2)
|
assert matches[0][1:] == (0, 2)
|
||||||
|
|
||||||
|
|
||||||
def test_matcher_operator_shadow(en_vocab):
|
def test_matcher_operator_shadow(en_vocab):
|
||||||
matcher = Matcher(en_vocab)
|
matcher = Matcher(en_vocab)
|
||||||
abc = ["a", "b", "c"]
|
abc = ["a", "b", "c"]
|
||||||
|
@ -123,7 +130,8 @@ def test_matcher_operator_shadow(en_vocab):
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
assert len(matches) == 1
|
assert len(matches) == 1
|
||||||
assert matches[0][1:] == (0, 3)
|
assert matches[0][1:] == (0, 3)
|
||||||
|
|
||||||
|
|
||||||
def test_matcher_phrase_matcher(en_vocab):
|
def test_matcher_phrase_matcher(en_vocab):
|
||||||
words = ["Google", "Now"]
|
words = ["Google", "Now"]
|
||||||
doc = get_doc(en_vocab, words)
|
doc = get_doc(en_vocab, words)
|
||||||
|
@ -134,6 +142,22 @@ def test_matcher_phrase_matcher(en_vocab):
|
||||||
assert len(matcher(doc)) == 1
|
assert len(matcher(doc)) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_phrase_matcher_length(en_vocab):
|
||||||
|
matcher = PhraseMatcher(en_vocab)
|
||||||
|
assert len(matcher) == 0
|
||||||
|
matcher.add('TEST', None, get_doc(en_vocab, ['test']))
|
||||||
|
assert len(matcher) == 1
|
||||||
|
matcher.add('TEST2', None, get_doc(en_vocab, ['test2']))
|
||||||
|
assert len(matcher) == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_phrase_matcher_contains(en_vocab):
|
||||||
|
matcher = PhraseMatcher(en_vocab)
|
||||||
|
matcher.add('TEST', None, get_doc(en_vocab, ['test']))
|
||||||
|
assert 'TEST' in matcher
|
||||||
|
assert 'TEST2' not in matcher
|
||||||
|
|
||||||
|
|
||||||
def test_matcher_match_zero(matcher):
|
def test_matcher_match_zero(matcher):
|
||||||
words1 = 'He said , " some words " ...'.split()
|
words1 = 'He said , " some words " ...'.split()
|
||||||
words2 = 'He said , " some three words " ...'.split()
|
words2 = 'He said , " some three words " ...'.split()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user