diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index 4cbb666c0..756dbecc1 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -237,10 +237,9 @@ cdef class NeuralEntityRecognizer(NeuralParser): nr_feature = 6 - def get_token_ids(self, states): + def set_token_ids(self, ids, states): cdef StateClass state cdef int n_tokens = 6 - ids = numpy.zeros((len(states), n_tokens), dtype='i', order='c') for i, state in enumerate(states): ids[i, 0] = state.c.B(0)-1 ids[i, 1] = state.c.B(0) @@ -253,7 +252,7 @@ cdef class NeuralEntityRecognizer(NeuralParser): ids[i, j] = -1 if ids[i, j] != -1: ids[i, j] += state.c.offset - return ids + ids[i+1:ids.shape[0]] = -1 cdef class BeamDependencyParser(BeamParser): diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 5140a41fd..ff558e20b 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -315,7 +315,9 @@ cdef class Parser: todo = [st for st in states if not st.is_final()] while todo: - token_ids = self.get_token_ids(todo) + token_ids = numpy.zeros((len(todo), self.nr_feature), + dtype='i', order='C') + self.set_token_ids(token_ids, todo) vectors = state2vec(token_ids) scores = vec2scores(vectors) self.transition_batch(todo, scores) @@ -339,44 +341,53 @@ cdef class Parser: todo = [(s, g) for s, g in zip(states, golds) if not s.is_final()] backprops = [] + cdef int max_steps = max(len(doc)*3 for doc in docs) + # Allocate one buffer for the token_ids and d_vectors + # This will make it quicker to copy back to GPU + token_ids = numpy.zeros((max_steps, len(todo), self.nr_feature), + dtype='i', order='C') + d_vectors = numpy.zeros((max_steps, len(todo), self.model[0].nO), + dtype='f', order='C') cdef float loss = 0. - while todo: + cdef int nr_step = 0 + while len(todo) >= 4 and nr_step < max_steps: states, golds = zip(*todo) - token_ids = self.get_token_ids(states) - vector, bp_vector = state2vec.begin_update(token_ids, drop=drop) + self.set_token_ids(token_ids[nr_step], states) + length = len(todo) + vector, bp_vector = state2vec.begin_update(token_ids[nr_step, :length], + drop=drop) scores, bp_scores = vec2scores.begin_update(vector, drop=drop) d_scores = self.get_batch_loss(states, golds, scores) - d_vector = bp_scores(d_scores, sgd=sgd) + d_vectors[nr_step, :length] = bp_scores(d_scores, sgd=sgd) - if isinstance(self.model[0].ops, CupyOps) \ - and not isinstance(token_ids, state2vec.ops.xp.ndarray): - # Move token_ids and d_vector to CPU, asynchronously - backprops.append(( - get_async(cuda_stream, token_ids), - get_async(cuda_stream, d_vector), - bp_vector - )) - else: - backprops.append((token_ids, d_vector, bp_vector)) + backprops.append((length, bp_vector)) self.transition_batch(states, scores) todo = [st for st in todo if not st[0].is_final()] - # Tells CUDA to block, so our async copies complete. - if cuda_stream is not None: - cuda_stream.synchronize() + nr_step += 1 + d_tokvecs = state2vec.ops.allocate(tokvecs.shape) + if type(token_ids) != type(d_tokvecs): + token_ids = get_async(cuda_stream, token_ids) + d_vectors = get_async(cuda_stream, d_vectors) + if cuda_stream is not None: + # Tells CUDA to block, so our async copies complete. + cuda_stream.synchronize() xp = state2vec.ops.xp # Handle for numpy/cupy - for token_ids, d_vector, bp_vector in backprops: + for i, (length, bp_vector) in enumerate(backprops): + d_vector = d_vectors[i, :length] d_state_features = bp_vector(d_vector, sgd=sgd) - active_feats = token_ids * (token_ids >= 0) - active_feats = active_feats.reshape((token_ids.shape[0], token_ids.shape[1], 1)) + step_token_ids = token_ids[i, :length] + active_feats = step_token_ids * (step_token_ids >= 0) + active_feats = active_feats.reshape((active_feats.shape[0], + active_feats.shape[1], 1)) if hasattr(xp, 'scatter_add'): xp.scatter_add(d_tokvecs, - token_ids, d_state_features * active_feats) + step_token_ids, d_state_features) else: xp.add.at(d_tokvecs, - token_ids, d_state_features * active_feats) + step_token_ids, d_state_features * active_feats) return d_tokvecs def get_batch_model(self, batch_size, tokvecs, stream, dropout): @@ -387,13 +398,11 @@ cdef class Parser: nr_feature = 13 - def get_token_ids(self, states): + def set_token_ids(self, token_ids, states): cdef StateClass state - cdef int n_tokens = self.nr_feature - ids = numpy.zeros((len(states), n_tokens), dtype='i', order='C') for i, state in enumerate(states): - state.set_context_tokens(ids[i]) - return ids + state.set_context_tokens(token_ids[i]) + token_ids[i+1:token_ids.shape[0]] = -1 def transition_batch(self, states, float[:, ::1] scores): cdef StateClass state