Revert padding in parser

This commit is contained in:
Matthew Honnibal 2017-09-14 16:59:25 +02:00
parent ae3bc72b29
commit 664c5af745
2 changed files with 2 additions and 10 deletions

View File

@ -119,12 +119,10 @@ cdef cppclass StateC:
# TODO error =/ # TODO error =/
pass pass
for i in range(n): for i in range(n):
# Token vectors should be padded, so that there's a vector for
# missing values at the start.
if ids[i] >= 0: if ids[i] >= 0:
ids[i] += this.offset + 1 ids[i] += this.offset
else: else:
ids[i] = 0 ids[i] = -1
int S(int i) nogil const: int S(int i) nogil const:
if i >= this._s_i: if i >= this._s_i:

View File

@ -394,7 +394,6 @@ cdef class Parser:
tokvecs = self.model[0].ops.flatten(tokvecses) tokvecs = self.model[0].ops.flatten(tokvecses)
if USE_FINE_TUNE: if USE_FINE_TUNE:
tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses))) tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
tokvecs = self._pad_tokvecs(tokvecs)
nr_state = len(docs) nr_state = len(docs)
nr_class = self.moves.n_moves nr_class = self.moves.n_moves
nr_dim = tokvecs.shape[1] nr_dim = tokvecs.shape[1]
@ -454,7 +453,6 @@ cdef class Parser:
tokvecs = self.model[0].ops.flatten(tokvecses) tokvecs = self.model[0].ops.flatten(tokvecses)
if USE_FINE_TUNE: if USE_FINE_TUNE:
tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses))) tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
tokvecs = self._pad_tokvecs(tokvecs)
cuda_stream = get_cuda_stream() cuda_stream = get_cuda_stream()
state2vec, vec2scores = self.get_batch_model(len(docs), tokvecs, state2vec, vec2scores = self.get_batch_model(len(docs), tokvecs,
cuda_stream, 0.0) cuda_stream, 0.0)
@ -527,7 +525,6 @@ cdef class Parser:
if losses is not None and self.name not in losses: if losses is not None and self.name not in losses:
losses[self.name] = 0. losses[self.name] = 0.
docs, tokvec_lists = docs_tokvecs docs, tokvec_lists = docs_tokvecs
tokvecs = self.model[0].ops.flatten(tokvec_lists)
if isinstance(docs, Doc) and isinstance(golds, GoldParse): if isinstance(docs, Doc) and isinstance(golds, GoldParse):
docs = [docs] docs = [docs]
golds = [golds] golds = [golds]
@ -535,7 +532,6 @@ cdef class Parser:
tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop) tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop)
tokvecs = self.model[0].ops.flatten(tokvecs) tokvecs = self.model[0].ops.flatten(tokvecs)
tokvecs = self._pad_tokvecs(tokvecs)
cuda_stream = get_cuda_stream() cuda_stream = get_cuda_stream()
@ -586,7 +582,6 @@ cdef class Parser:
break break
self._make_updates(d_tokvecs, self._make_updates(d_tokvecs,
backprops, sgd, cuda_stream) backprops, sgd, cuda_stream)
d_tokvecs = self._unpad_tokvecs(d_tokvecs)
d_tokvecs = self.model[0].ops.unflatten(d_tokvecs, [len(d) for d in docs]) d_tokvecs = self.model[0].ops.unflatten(d_tokvecs, [len(d) for d in docs])
if USE_FINE_TUNE: if USE_FINE_TUNE:
d_tokvecs = bp_my_tokvecs(d_tokvecs, sgd=sgd) d_tokvecs = bp_my_tokvecs(d_tokvecs, sgd=sgd)
@ -643,7 +638,6 @@ cdef class Parser:
d_tokvecs = self.model[0].ops.allocate(tokvecs.shape) d_tokvecs = self.model[0].ops.allocate(tokvecs.shape)
self._make_updates(d_tokvecs, backprop_lower, sgd, cuda_stream) self._make_updates(d_tokvecs, backprop_lower, sgd, cuda_stream)
d_tokvecs = self.model[0].ops.unflatten(d_tokvecs, lengths) d_tokvecs = self.model[0].ops.unflatten(d_tokvecs, lengths)
d_tokvecs = self._unpad_tokvecs(d_tokvecs)
if USE_FINE_TUNE: if USE_FINE_TUNE:
d_tokvecs = bp_my_tokvecs(d_tokvecs, sgd=sgd) d_tokvecs = bp_my_tokvecs(d_tokvecs, sgd=sgd)
return d_tokvecs return d_tokvecs