Use NORM attribute, not LOWER

This commit is contained in:
Matthew Honnibal 2017-06-03 15:30:16 -05:00
parent 94e063ae2a
commit 8a17b99b1c

View File

@ -13,7 +13,7 @@ from thinc import describe
from thinc.describe import Dimension, Synapses, Biases, Gradient from thinc.describe import Dimension, Synapses, Biases, Gradient
from thinc.neural._classes.affine import _set_dimensions_if_needed from thinc.neural._classes.affine import _set_dimensions_if_needed
from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP from .attrs import ID, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP
from .tokens.doc import Doc from .tokens.doc import Doc
import numpy import numpy
@ -131,14 +131,14 @@ class PrecomputableMaxouts(Model):
return Yfp, backward return Yfp, backward
def Tok2Vec(width, embed_size, preprocess=None): def Tok2Vec(width, embed_size, preprocess=None):
cols = [ID, LOWER, PREFIX, SUFFIX, SHAPE] cols = [ID, NORM, PREFIX, SUFFIX, SHAPE]
with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}): with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}):
lower = get_col(cols.index(LOWER)) >> HashEmbed(width, embed_size, name='embed_lower') norm = get_col(cols.index(NORM)) >> HashEmbed(width, embed_size, name='embed_lower')
prefix = get_col(cols.index(PREFIX)) >> HashEmbed(width, embed_size//2, name='embed_prefix') prefix = get_col(cols.index(PREFIX)) >> HashEmbed(width, embed_size//2, name='embed_prefix')
suffix = get_col(cols.index(SUFFIX)) >> HashEmbed(width, embed_size//2, name='embed_suffix') suffix = get_col(cols.index(SUFFIX)) >> HashEmbed(width, embed_size//2, name='embed_suffix')
shape = get_col(cols.index(SHAPE)) >> HashEmbed(width, embed_size//2, name='embed_shape') shape = get_col(cols.index(SHAPE)) >> HashEmbed(width, embed_size//2, name='embed_shape')
embed = (lower | prefix | suffix | shape ) embed = (norm | prefix | suffix | shape )
tok2vec = ( tok2vec = (
with_flatten( with_flatten(
asarray(Model.ops, dtype='uint64') asarray(Model.ops, dtype='uint64')
@ -148,7 +148,7 @@ def Tok2Vec(width, embed_size, preprocess=None):
>> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3))
>> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3))
>> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)), >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)),
pad=4, ndim=5) pad=4)
) )
if preprocess not in (False, None): if preprocess not in (False, None):
tok2vec = preprocess >> tok2vec tok2vec = preprocess >> tok2vec
@ -243,7 +243,7 @@ def zero_init(model):
def doc2feats(cols=None): def doc2feats(cols=None):
cols = [ID, LOWER, PREFIX, SUFFIX, SHAPE] cols = [ID, NORM, PREFIX, SUFFIX, SHAPE]
def forward(docs, drop=0.): def forward(docs, drop=0.):
feats = [] feats = []
for doc in docs: for doc in docs: