From 664c5af745786312725917cd9a44418777868350 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 14 Sep 2017 16:59:25 +0200 Subject: [PATCH] Revert padding in parser --- spacy/syntax/_state.pxd | 6 ++---- spacy/syntax/nn_parser.pyx | 6 ------ 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/spacy/syntax/_state.pxd b/spacy/syntax/_state.pxd index 9a08691de..4fb16881a 100644 --- a/spacy/syntax/_state.pxd +++ b/spacy/syntax/_state.pxd @@ -119,12 +119,10 @@ cdef cppclass StateC: # TODO error =/ pass for i in range(n): - # Token vectors should be padded, so that there's a vector for - # missing values at the start. if ids[i] >= 0: - ids[i] += this.offset + 1 + ids[i] += this.offset else: - ids[i] = 0 + ids[i] = -1 int S(int i) nogil const: if i >= this._s_i: diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index ad6ed280e..3ea17f2fe 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -394,7 +394,6 @@ cdef class Parser: tokvecs = self.model[0].ops.flatten(tokvecses) if USE_FINE_TUNE: tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses))) - tokvecs = self._pad_tokvecs(tokvecs) nr_state = len(docs) nr_class = self.moves.n_moves nr_dim = tokvecs.shape[1] @@ -454,7 +453,6 @@ cdef class Parser: tokvecs = self.model[0].ops.flatten(tokvecses) if USE_FINE_TUNE: tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses))) - tokvecs = self._pad_tokvecs(tokvecs) cuda_stream = get_cuda_stream() state2vec, vec2scores = self.get_batch_model(len(docs), tokvecs, cuda_stream, 0.0) @@ -527,7 +525,6 @@ cdef class Parser: if losses is not None and self.name not in losses: losses[self.name] = 0. docs, tokvec_lists = docs_tokvecs - tokvecs = self.model[0].ops.flatten(tokvec_lists) if isinstance(docs, Doc) and isinstance(golds, GoldParse): docs = [docs] golds = [golds] @@ -535,7 +532,6 @@ cdef class Parser: tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop) tokvecs = self.model[0].ops.flatten(tokvecs) - tokvecs = self._pad_tokvecs(tokvecs) cuda_stream = get_cuda_stream() @@ -586,7 +582,6 @@ cdef class Parser: break self._make_updates(d_tokvecs, backprops, sgd, cuda_stream) - d_tokvecs = self._unpad_tokvecs(d_tokvecs) d_tokvecs = self.model[0].ops.unflatten(d_tokvecs, [len(d) for d in docs]) if USE_FINE_TUNE: d_tokvecs = bp_my_tokvecs(d_tokvecs, sgd=sgd) @@ -643,7 +638,6 @@ cdef class Parser: d_tokvecs = self.model[0].ops.allocate(tokvecs.shape) self._make_updates(d_tokvecs, backprop_lower, sgd, cuda_stream) d_tokvecs = self.model[0].ops.unflatten(d_tokvecs, lengths) - d_tokvecs = self._unpad_tokvecs(d_tokvecs) if USE_FINE_TUNE: d_tokvecs = bp_my_tokvecs(d_tokvecs, sgd=sgd) return d_tokvecs