diff --git a/spacy/syntax/_parser_model.pxd b/spacy/syntax/_parser_model.pxd index 6e8321c99..38f2f0e4c 100644 --- a/spacy/syntax/_parser_model.pxd +++ b/spacy/syntax/_parser_model.pxd @@ -15,11 +15,11 @@ cdef struct SizesC: cdef struct WeightsC: - float* feat_weights - float* feat_bias - float* hidden_bias - float* hidden_weights - float* vectors + const float* feat_weights + const float* feat_bias + const float* hidden_bias + const float* hidden_weights + const float* vectors cdef struct ActivationsC: @@ -33,7 +33,9 @@ cdef struct ActivationsC: int _max_size -cdef WeightsC get_c_weights(model) +cdef WeightsC get_c_weights(model) except * + +cdef SizesC get_c_sizes(model, int batch_size) except * cdef void resize_activations(ActivationsC* A, SizesC n) nogil diff --git a/spacy/syntax/_parser_model.pyx b/spacy/syntax/_parser_model.pyx index f98e24c42..c7ec8cc6c 100644 --- a/spacy/syntax/_parser_model.pyx +++ b/spacy/syntax/_parser_model.pyx @@ -37,29 +37,56 @@ from ..errors import Errors, TempErrors from .. import util from .stateclass cimport StateClass from .transition_system cimport Transition -from . import _beam_utils, nonproj +from . import nonproj -cdef WeightsC get_c_weights(model): +cdef WeightsC get_c_weights(model) except *: cdef WeightsC output + cdef precompute_hiddens state2vec = model.state2vec + output.feat_weights = state2vec.get_feat_weights() + output.feat_bias = state2vec.bias.data + cdef np.ndarray vec2scores_W = model.vec2scores.W + cdef np.ndarray vec2scores_b = model.vec2scores.b + output.hidden_weights = vec2scores_W.data + output.hidden_bias = vec2scores_b.data + cdef np.ndarray tokvecs = model.tokvecs + output.vectors = tokvecs.data return output + +cdef SizesC get_c_sizes(model, int batch_size) except *: + cdef SizesC output + output.states = batch_size + output.classes = model.nO + output.hiddens = model.nH + output.pieces = model.nP + output.feats = model.nF + output.embed_width = model.nI + return output + + cdef void resize_activations(ActivationsC* A, SizesC n) nogil: - if n.states < A._max_size: + if n.states <= A._max_size: A._curr_size = n.states return if A._max_size == 0: A.token_ids = calloc(n.states * n.feats, sizeof(A.token_ids[0])) A.vectors = calloc(n.states * n.hiddens, sizeof(A.hiddens[0])) A.scores = calloc(n.states * n.classes, sizeof(A.scores[0])) - A.unmaxed = calloc(n.states * n.hiddens, sizeof(A.unmaxed[0])) + A.unmaxed = calloc(n.states * n.hiddens * n.feats, sizeof(A.unmaxed[0])) A.is_valid = calloc(n.states * n.classes, sizeof(A.is_valid[0])) + A._max_size = n.states else: - A.token_ids = realloc(A.token_ids, n.states * n.feats * sizeof(A.token_ids[0])) - A.vectors = realloc(A.token_ids, n.states * n.embed_width * sizeof(A.vectors[0])) - A.scores = realloc(A.scores, n.states * n.classes * sizeof(A.scores[0])) - A.unmaxed = realloc(A.unmaxed, n.states * n.hiddens * sizeof(A.unmaxed[0])) - A.is_valid = realloc(A.is_valid, n.states * n.classes * sizeof(A.is_valid[0])) + A.token_ids = realloc(A.token_ids, + n.states * n.feats * sizeof(A.token_ids[0])) + A.vectors = realloc(A.vectors, + n.states * n.embed_width * sizeof(A.vectors[0])) + A.scores = realloc(A.scores, + n.states * n.classes * sizeof(A.scores[0])) + A.unmaxed = realloc(A.unmaxed, + n.states * n.hiddens * n.feats * sizeof(A.unmaxed[0])) + A.is_valid = realloc(A.is_valid, + n.states * n.classes * sizeof(A.is_valid[0])) A._max_size = n.states A._curr_size = n.states @@ -165,8 +192,28 @@ class ParserModel(Model): Model.__init__(self) self._layers = [tok2vec, lower_model, upper_model] + @property + def nO(self): + return self._layers[-1].nO + + @property + def nI(self): + return self._layers[1].nI + + @property + def nH(self): + return self._layers[1].nO + + @property + def nF(self): + return self._layers[1].nF + + @property + def nP(self): + return self._layers[1].nP + def begin_update(self, docs, drop=0.): - step_model = ParserStepModel(docs, self.layers, drop=drop) + step_model = ParserStepModel(docs, self._layers, drop=drop) def finish_parser_update(golds, sgd=None): step_model.make_updates(sgd) return None @@ -202,7 +249,7 @@ class ParserStepModel(Model): def backprop_parser_step(d_scores, sgd=None): d_vector = bp_dropout(get_d_vector(d_scores, sgd=sgd)) - if isinstance(self.model[0].ops, CupyOps) \ + if isinstance(self.ops, CupyOps) \ and not isinstance(token_ids, self.state2vec.ops.xp.ndarray): # Move token_ids and d_vector to GPU, asynchronously self.backprops.append(( @@ -217,7 +264,7 @@ class ParserStepModel(Model): def get_token_ids(self, states): cdef StateClass state - cdef int n_tokens = self.nr_feature + cdef int n_tokens = self.state2vec.nF cdef np.ndarray ids = numpy.zeros((len(states), n_tokens), dtype='i', order='C') c_ids = ids.data @@ -263,7 +310,7 @@ cdef class precompute_hiddens: we can do all our hard maths up front, packed into large multiplications, and do the hard-to-program parsing on the CPU. """ - cdef int nF, nO, nP + cdef readonly int nF, nO, nP cdef bint _is_synchronized cdef public object ops cdef np.ndarray _features diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 5d45e8170..36f5643a1 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -31,7 +31,7 @@ from thinc cimport openblas from ._parser_model cimport resize_activations, predict_states, arg_max_if_valid from ._parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss -from ._parser_model cimport get_c_weights +from ._parser_model cimport get_c_weights, get_c_sizes from ._parser_model import ParserModel from .._ml import zero_init, PrecomputableAffine, Tok2Vec, flatten from .._ml import link_vectors_to_models, create_default_optimizer @@ -43,7 +43,7 @@ from .. import util from .stateclass cimport StateClass from ._state cimport StateC from .transition_system cimport Transition -from . import _beam_utils, nonproj +from . import nonproj cdef class Parser: @@ -182,8 +182,9 @@ cdef class Parser: beam_width = self.cfg.get('beam_width', 1) if beam_density is None: beam_density = self.cfg.get('beam_density', 0.0) - states, tokvecs = self.predict([doc]) - self.set_annotations([doc], states, tensors=tokvecs) + states = self.predict([doc]) + #beam_width=beam_width, beam_density=beam_density) + self.set_annotations([doc], states, tensors=None) return doc def pipe(self, docs, int batch_size=256, int n_threads=2, @@ -217,13 +218,14 @@ cdef class Parser: if isinstance(docs, Doc): docs = [docs] - cdef SizesC sizes cdef vector[StateC*] states cdef StateClass state state_objs = self.moves.init_batch(docs) for state in state_objs: states.push_back(state.c) - cdef WeightsC weights = get_c_weights(self.model) + model = self.model(docs) + cdef WeightsC weights = get_c_weights(model) + cdef SizesC sizes = get_c_sizes(self.model, len(state_objs)) with nogil: self._parseC(&states[0], weights, sizes) @@ -234,6 +236,7 @@ cdef class Parser: cdef int i, j cdef vector[StateC*] unfinished cdef ActivationsC activations + memset(&activations, 0, sizeof(activations)) while sizes.states >= 1: predict_states(&activations, states, &weights, sizes) @@ -248,7 +251,7 @@ cdef class Parser: sizes.states = unfinished.size() unfinished.clear() - def set_annotations(self, docs, states): + def set_annotations(self, docs, states, tensors=None): cdef StateClass state cdef Doc doc for i, (state, doc) in enumerate(zip(states, docs)):