diff --git a/spacy/ml/parser_model.pxd b/spacy/ml/parser_model.pxd index ca31c1699..883862551 100644 --- a/spacy/ml/parser_model.pxd +++ b/spacy/ml/parser_model.pxd @@ -41,10 +41,9 @@ cdef ActivationsC alloc_activations(SizesC n) nogil cdef void free_activations(const ActivationsC* A) nogil cdef void predict_states(CBlas cblas, ActivationsC* A, StateC** states, - const WeightsC* W, SizesC n) nogil - + const WeightsC* W, SizesC n) nogil + cdef int arg_max_if_valid(const weight_t* scores, const int* is_valid, int n) nogil -cdef void cpu_log_loss(float* d_scores, - const float* costs, const int* is_valid, const float* scores, int O) nogil - +cdef void cpu_log_loss(float* d_scores, const float* costs, + const int* is_valid, const float* scores, int O) nogil diff --git a/spacy/ml/parser_model.pyx b/spacy/ml/parser_model.pyx index 90e836f8a..843275f4c 100644 --- a/spacy/ml/parser_model.pyx +++ b/spacy/ml/parser_model.pyx @@ -13,7 +13,7 @@ from .. import util from ..errors import Errors from ..pipeline._parser_internals.stateclass cimport StateClass -from ..typedefs cimport class_t, hash_t, weight_t +from ..typedefs cimport weight_t cdef WeightsC get_c_weights(model) except *: @@ -78,31 +78,31 @@ cdef void resize_activations(ActivationsC* A, SizesC n) nogil: A._max_size = n.states else: A.token_ids = realloc(A.token_ids, - n.states * n.feats * sizeof(A.token_ids[0])) + n.states * n.feats * sizeof(A.token_ids[0])) A.scores = realloc(A.scores, - n.states * n.classes * sizeof(A.scores[0])) + n.states * n.classes * sizeof(A.scores[0])) A.unmaxed = realloc(A.unmaxed, - n.states * n.hiddens * n.pieces * sizeof(A.unmaxed[0])) + n.states * n.hiddens * n.pieces * sizeof(A.unmaxed[0])) A.hiddens = realloc(A.hiddens, - n.states * n.hiddens * sizeof(A.hiddens[0])) + n.states * n.hiddens * sizeof(A.hiddens[0])) A.is_valid = realloc(A.is_valid, - n.states * n.classes * sizeof(A.is_valid[0])) + n.states * n.classes * sizeof(A.is_valid[0])) A._max_size = n.states A._curr_size = n.states cdef void predict_states(CBlas cblas, ActivationsC* A, StateC** states, - const WeightsC* W, SizesC n) nogil: - cdef double one = 1.0 + const WeightsC* W, SizesC n) nogil: resize_activations(A, n) for i in range(n.states): states[i].set_context_tokens(&A.token_ids[i*n.feats], n.feats) memset(A.unmaxed, 0, n.states * n.hiddens * n.pieces * sizeof(float)) memset(A.hiddens, 0, n.states * n.hiddens * sizeof(float)) - sum_state_features(cblas, A.unmaxed, - W.feat_weights, A.token_ids, n.states, n.feats, n.hiddens * n.pieces) + sum_state_features(cblas, A.unmaxed, W.feat_weights, A.token_ids, n.states, + n.feats, n.hiddens * n.pieces) for i in range(n.states): - saxpy(cblas)(n.hiddens * n.pieces, 1., W.feat_bias, 1, &A.unmaxed[i*n.hiddens*n.pieces], 1) + saxpy(cblas)(n.hiddens * n.pieces, 1., W.feat_bias, 1, + &A.unmaxed[i*n.hiddens*n.pieces], 1) for j in range(n.hiddens): index = i * n.hiddens * n.pieces + j * n.pieces which = _arg_max(&A.unmaxed[index], n.pieces) @@ -112,10 +112,10 @@ cdef void predict_states(CBlas cblas, ActivationsC* A, StateC** states, memcpy(A.scores, A.hiddens, n.states * n.classes * sizeof(float)) else: # Compute hidden-to-output - sgemm(cblas)(False, True, n.states, n.classes, n.hiddens, - 1.0, A.hiddens, n.hiddens, - W.hidden_weights, n.hiddens, - 0.0, A.scores, n.classes) + sgemm(cblas)(False, True, n.states, n.classes, n.hiddens, 1.0, + A.hiddens, n.hiddens, + W.hidden_weights, n.hiddens, 0.0, + A.scores, n.classes) # Add bias for i in range(n.states): saxpy(cblas)(n.classes, 1., W.hidden_bias, 1, &A.scores[i*n.classes], 1) @@ -131,9 +131,9 @@ cdef void predict_states(CBlas cblas, ActivationsC* A, StateC** states, A.scores[i*n.classes+j] = min_ -cdef void sum_state_features(CBlas cblas, float* output, - const float* cached, const int* token_ids, int B, int F, int O) nogil: - cdef int idx, b, f, i +cdef void sum_state_features(CBlas cblas, float* output, const float* cached, + const int* token_ids, int B, int F, int O) nogil: + cdef int idx, b, f cdef const float* feature padding = cached cached += F * O @@ -150,9 +150,8 @@ cdef void sum_state_features(CBlas cblas, float* output, token_ids += F -cdef void cpu_log_loss(float* d_scores, - const float* costs, const int* is_valid, const float* scores, - int O) nogil: +cdef void cpu_log_loss(float* d_scores, const float* costs, const int* is_valid, + const float* scores, int O) nogil: """Do multi-label log loss""" cdef double max_, gmax, Z, gZ best = arg_max_if_gold(scores, costs, is_valid, O) @@ -178,7 +177,7 @@ cdef void cpu_log_loss(float* d_scores, cdef int arg_max_if_gold(const weight_t* scores, const weight_t* costs, - const int* is_valid, int n) nogil: + const int* is_valid, int n) nogil: # Find minimum cost cdef float cost = 1 for i in range(n): @@ -202,10 +201,9 @@ cdef int arg_max_if_valid(const weight_t* scores, const int* is_valid, int n) no return best - class ParserStepModel(Model): def __init__(self, docs, layers, *, has_upper, unseen_classes=None, train=True, - dropout=0.1): + dropout=0.1): Model.__init__(self, name="parser_step_model", forward=step_forward) self.attrs["has_upper"] = has_upper self.attrs["dropout_rate"] = dropout @@ -267,7 +265,7 @@ class ParserStepModel(Model): def backprop_step(self, token_ids, d_vector, get_d_tokvecs): if isinstance(self.state2vec.ops, CupyOps) \ - and not isinstance(token_ids, self.state2vec.ops.xp.ndarray): + and not isinstance(token_ids, self.state2vec.ops.xp.ndarray): # Move token_ids and d_vector to GPU, asynchronously self.backprops.append(( util.get_async(self.cuda_stream, token_ids), @@ -277,7 +275,6 @@ class ParserStepModel(Model): else: self.backprops.append((token_ids, d_vector, get_d_tokvecs)) - def finish_steps(self, golds): # Add a padding vector to the d_tokvecs gradient, so that missing # values don't affect the real gradient. @@ -290,14 +287,15 @@ class ParserStepModel(Model): ids = ids.flatten() d_state_features = d_state_features.reshape( (ids.size, d_state_features.shape[2])) - self.ops.scatter_add(d_tokvecs, ids, - d_state_features) + self.ops.scatter_add(d_tokvecs, ids, d_state_features) # Padded -- see update() self.bp_tokvecs(d_tokvecs[:-1]) return d_tokvecs + NUMPY_OPS = NumpyOps() + def step_forward(model: ParserStepModel, states, is_train): token_ids = model.get_token_ids(states) vector, get_d_tokvecs = model.state2vec(token_ids, is_train) @@ -310,7 +308,7 @@ def step_forward(model: ParserStepModel, states, is_train): scores, get_d_vector = model.vec2scores(vector, is_train) else: scores = NumpyOps().asarray(vector) - get_d_vector = lambda d_scores: d_scores + def get_d_vector(d_scores): return d_scores # If the class is unseen, make sure its score is minimum scores[:, model._class_mask == 0] = numpy.nanmin(scores) @@ -445,8 +443,8 @@ cdef class precompute_hiddens: feat_weights = self.get_feat_weights() cdef int[:, ::1] ids = token_ids sum_state_features(cblas, state_vector.data, - feat_weights, &ids[0,0], - token_ids.shape[0], self.nF, self.nO*self.nP) + feat_weights, &ids[0, 0], token_ids.shape[0], + self.nF, self.nO*self.nP) state_vector += self.bias state_vector, bp_nonlinearity = self._nonlinearity(state_vector) @@ -471,7 +469,7 @@ cdef class precompute_hiddens: def backprop_maxout(d_best): return self.ops.backprop_maxout(d_best, mask, self.nP) - + return state_vector, backprop_maxout def _relu_nonlinearity(self, state_vector): @@ -485,7 +483,7 @@ cdef class precompute_hiddens: def backprop_relu(d_best): d_best *= mask return d_best.reshape((d_best.shape + (1,))) - + return state_vector, backprop_relu cdef inline int _arg_max(const float* scores, const int n_classes) nogil: diff --git a/spacy/pipeline/_parser_internals/ner.pyx b/spacy/pipeline/_parser_internals/ner.pyx index d6ee29397..411e53668 100644 --- a/spacy/pipeline/_parser_internals/ner.pyx +++ b/spacy/pipeline/_parser_internals/ner.pyx @@ -156,7 +156,7 @@ cdef class BiluoPushDown(TransitionSystem): if token.ent_type: labels.add(token.ent_type_) return labels - + def move_name(self, int move, attr_t label): if move == OUT: return 'O' @@ -641,7 +641,7 @@ cdef class Unit: cost += 1 break return cost - + cdef class Out: @staticmethod diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx index 6daf6e7a6..e4767ed2f 100644 --- a/spacy/pipeline/dep_parser.pyx +++ b/spacy/pipeline/dep_parser.pyx @@ -127,6 +127,7 @@ def make_parser( scorer=scorer, ) + @Language.factory( "beam_parser", assigns=["token.dep", "token.head", "token.is_sent_start", "doc.sents"], diff --git a/spacy/pipeline/ner.pyx b/spacy/pipeline/ner.pyx index f11b16f65..4ce7ec37b 100644 --- a/spacy/pipeline/ner.pyx +++ b/spacy/pipeline/ner.pyx @@ -15,7 +15,7 @@ from ._parser_internals.ner cimport BiluoPushDown from .transition_parser cimport Parser from ..language import Language -from ..scorer import PRFScore, get_ner_prf +from ..scorer import get_ner_prf from ..training import remove_bilu_prefix from ..util import registry @@ -105,6 +105,7 @@ def make_ner( scorer=scorer, ) + @Language.factory( "beam_ner", assigns=["doc.ents", "token.ent_iob", "token.ent_type"], diff --git a/spacy/pipeline/transition_parser.pxd b/spacy/pipeline/transition_parser.pxd index a48d76b68..7adb82213 100644 --- a/spacy/pipeline/transition_parser.pxd +++ b/spacy/pipeline/transition_parser.pxd @@ -15,7 +15,7 @@ cdef class Parser(TrainablePipe): cdef object _cpu_ops cdef void _parseC(self, CBlas cblas, StateC** states, - WeightsC weights, SizesC sizes) nogil + WeightsC weights, SizesC sizes) nogil cdef void c_transition_batch(self, StateC** states, const float* scores, - int nr_class, int batch_size) nogil + int nr_class, int batch_size) nogil diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx index fb4db2da9..66eb03ee4 100644 --- a/spacy/pipeline/transition_parser.pyx +++ b/spacy/pipeline/transition_parser.pyx @@ -9,7 +9,7 @@ from cymem.cymem cimport Pool from itertools import islice from libc.stdlib cimport calloc, free -from libc.string cimport memcpy, memset +from libc.string cimport memset from libcpp.vector cimport vector import random @@ -22,14 +22,13 @@ from thinc.api import ( NumpyOps, Optimizer, chain, - get_array_module, get_ops, set_dropout_rate, softmax_activation, use_ops, ) from thinc.legacy import LegacySequenceCategoricalCrossentropy -from thinc.types import Floats2d, Ints1d +from thinc.types import Floats2d from ..ml.parser_model cimport ( ActivationsC, @@ -44,7 +43,6 @@ from ..ml.parser_model cimport ( predict_states, ) from ..tokens.doc cimport Doc -from ._parser_internals.search cimport Beam from ._parser_internals.stateclass cimport StateClass from .trainable_pipe import TrainablePipe @@ -54,11 +52,10 @@ from ._parser_internals cimport _beam_utils from ._parser_internals import _beam_utils from ..tokens.doc cimport Doc -from ..typedefs cimport weight_t from ..vocab cimport Vocab from ._parser_internals cimport _beam_utils from ._parser_internals.stateclass cimport StateC, StateClass -from ._parser_internals.transition_system cimport Transition, TransitionSystem +from ._parser_internals.transition_system cimport Transition from .trainable_pipe cimport TrainablePipe from .. import util @@ -289,7 +286,7 @@ cdef class Parser(TrainablePipe): with use_ops("numpy"): teacher_model = chain(teacher_step_model, softmax_activation()) student_model = chain(student_step_model, softmax_activation()) - + max_moves = self.cfg["update_with_oracle_cut_size"] if max_moves >= 1: # Chop sequences into lengths of this many words, to make the @@ -434,8 +431,6 @@ cdef class Parser(TrainablePipe): return batch def beam_parse(self, docs, int beam_width, float drop=0., beam_density=0.): - cdef Beam beam - cdef Doc doc self._ensure_labels_are_added(docs) batch = _beam_utils.BeamBatch( self.moves, @@ -456,15 +451,15 @@ cdef class Parser(TrainablePipe): return list(batch) cdef void _parseC(self, CBlas cblas, StateC** states, - WeightsC weights, SizesC sizes) nogil: - cdef int i, j + WeightsC weights, SizesC sizes) nogil: + cdef int i cdef vector[StateC*] unfinished cdef ActivationsC activations = alloc_activations(sizes) while sizes.states >= 1: predict_states(cblas, &activations, states, &weights, sizes) # Validate actions, argmax, take action. - self.c_transition_batch(states, - activations.scores, sizes.classes, sizes.states) + self.c_transition_batch(states, activations.scores, + sizes.classes, sizes.states) for i in range(sizes.states): if not states[i].is_final(): unfinished.push_back(states[i]) @@ -493,7 +488,7 @@ cdef class Parser(TrainablePipe): return [state for state in states if not state.c.is_final()] cdef void c_transition_batch(self, StateC** states, const float* scores, - int nr_class, int batch_size) nogil: + int nr_class, int batch_size) nogil: # n_moves should not be zero at this point, but make sure to avoid zero-length mem alloc with gil: assert self.moves.n_moves > 0, Errors.E924.format(name=self.name) @@ -551,8 +546,7 @@ cdef class Parser(TrainablePipe): if not states: return losses model, backprop_tok2vec = self.model.begin_update([eg.x for eg in examples]) - - all_states = list(states) + states_golds = list(zip(states, golds)) n_moves = 0 while states_golds: @@ -632,8 +626,8 @@ cdef class Parser(TrainablePipe): del tutor return losses - def update_beam(self, examples, *, beam_width, - drop=0., sgd=None, losses=None, beam_density=0.0): + def update_beam(self, examples, *, beam_width, drop=0., sgd=None, + losses=None, beam_density=0.0): states, golds, _ = self.moves.init_gold_batch(examples) if not states: return losses @@ -664,7 +658,7 @@ cdef class Parser(TrainablePipe): is_valid = mem.alloc(self.moves.n_moves, sizeof(int)) costs = mem.alloc(self.moves.n_moves, sizeof(float)) cdef np.ndarray d_scores = numpy.zeros((len(states), self.moves.n_moves), - dtype='f', order='C') + dtype='f', order='C') c_d_scores = d_scores.data unseen_classes = self.model.attrs["unseen_classes"] for i, (state, gold) in enumerate(zip(states, golds)): @@ -674,8 +668,8 @@ cdef class Parser(TrainablePipe): for j in range(self.moves.n_moves): if costs[j] <= 0.0 and j in unseen_classes: unseen_classes.remove(j) - cpu_log_loss(c_d_scores, - costs, is_valid, &scores[i, 0], d_scores.shape[1]) + cpu_log_loss(c_d_scores, costs, is_valid, &scores[i, 0], + d_scores.shape[1]) c_d_scores += d_scores.shape[1] # Note that we don't normalize this. See comment in update() for why. if losses is not None: @@ -785,10 +779,7 @@ cdef class Parser(TrainablePipe): long_doc[:N], and another representing long_doc[N:]. In contrast to _init_gold_batch, this version uses a teacher model to generate the cut sequences.""" - cdef: - StateClass start_state - StateClass state - Transition action + cdef StateClass state all_states = self.moves.init_batch(docs) states = [] to_cut = [] @@ -810,7 +801,6 @@ cdef class Parser(TrainablePipe): length += 1 return states - def _init_gold_batch(self, examples, max_length): """Make a square batch, of length equal to the shortest transition sequence or a cap. A long diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx index e41f9e02e..efca4bcb0 100644 --- a/spacy/training/example.pyx +++ b/spacy/training/example.pyx @@ -1,4 +1,3 @@ -import warnings from collections.abc import Iterable as IterableInstance import numpy