diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 24d0975fe..023714569 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -1,7 +1,6 @@ # cython: infer_types=True # cython: cdivision=True # cython: boundscheck=False -# cython: profile=True # coding: utf-8 from __future__ import unicode_literals, print_function @@ -29,6 +28,8 @@ from thinc.neural.ops import CupyOps from thinc.neural.util import get_array_module from thinc.linalg cimport Vec, VecVec +from thinc.openblas cimport simple_gemm, simple_axpy + from .._ml import zero_init, PrecomputableAffine, Tok2Vec, flatten from .._ml import link_vectors_to_models, create_default_optimizer from ..compat import json_dumps, copy_array @@ -171,8 +172,9 @@ cdef void sum_state_features(float* output, else: idx = token_ids[f] * F * O + f*O feature = &cached[idx] - for i in range(O): - output[i] += feature[i] + simple_axpy(output, O, feature, 1.) + #for i in range(O): + # output[i] += feature[i] output += O token_ids += F @@ -422,59 +424,69 @@ cdef class Parser: cdef int nr_hidden = hidden_weights.shape[0] cdef int nr_task = states.size() with nogil: - for i in range(nr_task): - self._parseC(states[i], - feat_weights, bias, hW, hb, - nr_class, nr_hidden, nr_feat, nr_piece) + self._parseC(&states[0], nr_task, feat_weights, bias, hW, hb, + nr_class, nr_hidden, nr_feat, nr_piece) PyErr_CheckSignals() tokvecs = self.model[0].ops.unflatten(tokvecs, [len(doc) for doc in docs]) return state_objs, tokvecs - cdef void _parseC(self, StateC* state, + cdef void _parseC(self, StateC** states, int nr_task, const float* feat_weights, const float* bias, const float* hW, const float* hb, int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil: token_ids = calloc(nr_feat, sizeof(int)) is_valid = calloc(nr_class, sizeof(int)) - vectors = calloc(nr_hidden * nr_piece, sizeof(float)) - scores = calloc(nr_class, sizeof(float)) + vectors = calloc(nr_hidden * nr_task, sizeof(float)) + unmaxed = calloc(nr_hidden * nr_piece, sizeof(float)) + scores = calloc(nr_class*nr_task, sizeof(float)) if not (token_ids and is_valid and vectors and scores): with gil: PyErr_SetFromErrno(MemoryError) PyErr_CheckSignals() - cdef float feature - while not state.is_final(): - state.set_context_tokens(token_ids, nr_feat) - memset(vectors, 0, nr_hidden * nr_piece * sizeof(float)) - memset(scores, 0, nr_class * sizeof(float)) - sum_state_features(vectors, - feat_weights, token_ids, 1, nr_feat, nr_hidden * nr_piece) - for i in range(nr_hidden * nr_piece): - vectors[i] += bias[i] - V = vectors - W = hW - for i in range(nr_hidden): - if nr_piece == 1: - feature = V[0] if V[0] >= 0. else 0. - elif nr_piece == 2: - feature = V[0] if V[0] >= V[1] else V[1] - else: - feature = Vec.max(V, nr_piece) - for j in range(nr_class): - scores[j] += feature * W[j] - W += nr_class - V += nr_piece - for i in range(nr_class): - scores[i] += hb[i] - self.moves.set_valid(is_valid, state) - guess = arg_max_if_valid(scores, is_valid, nr_class) - action = self.moves.c[guess] - action.do(state, action.label) - state.push_hist(guess) + cdef int nr_todo = nr_task + cdef int i, j + cdef vector[StateC*] unfinished + while nr_todo >= 1: + memset(vectors, 0, nr_todo * nr_hidden * sizeof(float)) + memset(scores, 0, nr_todo * nr_class * sizeof(float)) + for i in range(nr_todo): + state = states[i] + state.set_context_tokens(token_ids, nr_feat) + memset(unmaxed, 0, nr_hidden * nr_piece * sizeof(float)) + sum_state_features(unmaxed, + feat_weights, token_ids, 1, nr_feat, nr_hidden * nr_piece) + simple_axpy(unmaxed, nr_hidden*nr_piece, bias, 1.0) + state_vector = &vectors[i*nr_hidden] + for j in range(nr_hidden): + index = j * nr_piece + which = Vec.arg_max(&unmaxed[index], nr_piece) + state_vector[j] = unmaxed[index + which] + # Compute hidden-to-output + simple_gemm(scores, nr_todo, nr_class, + vectors, nr_todo, nr_hidden, + hW, nr_hidden, nr_class, 0, 0) + # Add bias + for i in range(nr_todo): + simple_axpy(&scores[i*nr_class], nr_class, hb, 1.0) + # Validate actions, argmax, take action. + for i in range(nr_todo): + state = states[i] + self.moves.set_valid(is_valid, state) + guess = arg_max_if_valid(&scores[i*nr_class], is_valid, nr_class) + action = self.moves.c[guess] + action.do(state, action.label) + state.push_hist(guess) + if not state.is_final(): + unfinished.push_back(state) + for i in range(unfinished.size()): + states[i] = unfinished[i] + nr_todo = unfinished.size() + unfinished.clear() free(token_ids) free(is_valid) free(vectors) + free(unmaxed) free(scores) def beam_parse(self, docs, int beam_width=3, float beam_density=0.001,