Fix significant bug in feature calculation -- off by 1

This commit is contained in:
Matthew Honnibal 2017-05-18 06:21:32 -05:00
parent fc8d3a112c
commit a438cef8c5

View File

@ -121,7 +121,7 @@ class PrecomputableMaxouts(Model):
return Yfp, backward
def Tok2Vec(width, embed_size, preprocess=None):
cols = [LOWER, PREFIX, SUFFIX, SHAPE]
cols = [ID, LOWER, PREFIX, SUFFIX, SHAPE]
with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}):
lower = get_col(cols.index(LOWER)) >> HashEmbed(width, embed_size)
prefix = get_col(cols.index(PREFIX)) >> HashEmbed(width, embed_size//2)
@ -150,7 +150,7 @@ def get_col(idx):
ops = NumpyOps()
else:
ops = CupyOps()
output = ops.xp.ascontiguousarray(X[:, idx])
output = ops.xp.ascontiguousarray(X[:, idx], dtype=X.dtype)
def backward(y, sgd=None):
dX = ops.allocate(X.shape)
dX[:, idx] += y
@ -176,6 +176,7 @@ def doc2feats(cols=None):
doc.to_array(cols),
dtype='uint64')
feats.append(doc.user_data['cached_feats'])
assert feats[-1].dtype == 'uint64'
return feats, None
model = layerize(forward)
model.cols = cols