Revert padding in parser

2025-07-14 18:22:27 +03:00 · 2017-09-14 16:59:25 +02:00 · 2017-09-14 16:59:25 +02:00 · 664c5af745
commit 664c5af745
parent ae3bc72b29
2 changed files with 2 additions and 10 deletions
--- a/spacy/syntax/_state.pxd
+++ b/spacy/syntax/_state.pxd
@ -119,12 +119,10 @@ cdef cppclass StateC:
            # TODO error =/
            pass
        for i in range(n):
            # Token vectors should be padded, so that there's a vector for
            # missing values at the start.
            if ids[i] >= 0:
-                ids[i] += this.offset + 1
+                ids[i] += this.offset
            else:
-                ids[i] = 0
+                ids[i] = -1
    int S(int i) nogil const:
        if i >= this._s_i:
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -394,7 +394,6 @@ cdef class Parser:
        tokvecs = self.model[0].ops.flatten(tokvecses)
        if USE_FINE_TUNE:
            tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
        tokvecs = self._pad_tokvecs(tokvecs)
        nr_state = len(docs)
        nr_class = self.moves.n_moves
        nr_dim = tokvecs.shape[1]
@ -454,7 +453,6 @@ cdef class Parser:
        tokvecs = self.model[0].ops.flatten(tokvecses)
        if USE_FINE_TUNE:
            tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
        tokvecs = self._pad_tokvecs(tokvecs)
        cuda_stream = get_cuda_stream()
        state2vec, vec2scores = self.get_batch_model(len(docs), tokvecs,
                                                     cuda_stream, 0.0)
@ -527,7 +525,6 @@ cdef class Parser:
        if losses is not None and self.name not in losses:
            losses[self.name] = 0.
        docs, tokvec_lists = docs_tokvecs
        tokvecs = self.model[0].ops.flatten(tokvec_lists)
        if isinstance(docs, Doc) and isinstance(golds, GoldParse):
            docs = [docs]
            golds = [golds]
@ -535,7 +532,6 @@ cdef class Parser:
            tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop)
            tokvecs = self.model[0].ops.flatten(tokvecs)
        tokvecs = self._pad_tokvecs(tokvecs)
        cuda_stream = get_cuda_stream()
@ -586,7 +582,6 @@ cdef class Parser:
                break
        self._make_updates(d_tokvecs,
            backprops, sgd, cuda_stream)
        d_tokvecs = self._unpad_tokvecs(d_tokvecs)
        d_tokvecs = self.model[0].ops.unflatten(d_tokvecs, [len(d) for d in docs])
        if USE_FINE_TUNE:
            d_tokvecs = bp_my_tokvecs(d_tokvecs, sgd=sgd)
@ -643,7 +638,6 @@ cdef class Parser:
        d_tokvecs = self.model[0].ops.allocate(tokvecs.shape)
        self._make_updates(d_tokvecs, backprop_lower, sgd, cuda_stream)
        d_tokvecs = self.model[0].ops.unflatten(d_tokvecs, lengths)
        d_tokvecs = self._unpad_tokvecs(d_tokvecs)
        if USE_FINE_TUNE:
            d_tokvecs = bp_my_tokvecs(d_tokvecs, sgd=sgd)
        return d_tokvecs