Make parser_maxout_pieces hyper-param work

This commit is contained in:
Matthew Honnibal 2017-10-19 13:45:18 +02:00
parent 03a215c5fd
commit b54b4b8a97

View File

@ -153,7 +153,7 @@ cdef class precompute_hiddens:
if bp_nonlinearity is not None: if bp_nonlinearity is not None:
d_state_vector = bp_nonlinearity(d_state_vector, sgd) d_state_vector = bp_nonlinearity(d_state_vector, sgd)
# This will usually be on GPU # This will usually be on GPU
if isinstance(d_state_vector, numpy.ndarray): if not isinstance(d_state_vector, self.ops.xp.ndarray):
d_state_vector = self.ops.xp.array(d_state_vector) d_state_vector = self.ops.xp.array(d_state_vector)
d_tokens = bp_hiddens((d_state_vector, token_ids), sgd) d_tokens = bp_hiddens((d_state_vector, token_ids), sgd)
return d_tokens return d_tokens
@ -244,8 +244,8 @@ cdef class Parser:
if depth != 1: if depth != 1:
raise ValueError("Currently parser depth is hard-coded to 1.") raise ValueError("Currently parser depth is hard-coded to 1.")
parser_maxout_pieces = util.env_opt('parser_maxout_pieces', cfg.get('maxout_pieces', 2)) parser_maxout_pieces = util.env_opt('parser_maxout_pieces', cfg.get('maxout_pieces', 2))
if parser_maxout_pieces != 2: #if parser_maxout_pieces != 2:
raise ValueError("Currently parser_maxout_pieces is hard-coded to 2") # raise ValueError("Currently parser_maxout_pieces is hard-coded to 2")
token_vector_width = util.env_opt('token_vector_width', cfg.get('token_vector_width', 128)) token_vector_width = util.env_opt('token_vector_width', cfg.get('token_vector_width', 128))
hidden_width = util.env_opt('hidden_width', cfg.get('hidden_width', 200)) hidden_width = util.env_opt('hidden_width', cfg.get('hidden_width', 200))
embed_size = util.env_opt('embed_size', cfg.get('embed_size', 7000)) embed_size = util.env_opt('embed_size', cfg.get('embed_size', 7000))
@ -258,9 +258,13 @@ cdef class Parser:
tok2vec = Tok2Vec(token_vector_width, embed_size, tok2vec = Tok2Vec(token_vector_width, embed_size,
pretrained_dims=cfg.get('pretrained_dims', 0)) pretrained_dims=cfg.get('pretrained_dims', 0))
tok2vec = chain(tok2vec, flatten) tok2vec = chain(tok2vec, flatten)
lower = PrecomputableMaxouts(hidden_width if depth >= 1 else nr_class, if parser_maxout_pieces >= 2:
nF=cls.nr_feature, nP=parser_maxout_pieces, lower = PrecomputableMaxouts(hidden_width if depth >= 1 else nr_class,
nI=token_vector_width) nF=cls.nr_feature, nP=parser_maxout_pieces,
nI=token_vector_width)
else:
lower = PrecomputableAffine(hidden_width if depth >= 1 else nr_class,
nF=cls.nr_feature, nI=token_vector_width)
with Model.use_device('cpu'): with Model.use_device('cpu'):
upper = chain( upper = chain(
@ -413,7 +417,7 @@ cdef class Parser:
for stcls in state_objs: for stcls in state_objs:
if not stcls.c.is_final(): if not stcls.c.is_final():
states.push_back(stcls.c) states.push_back(stcls.c)
feat_weights = state2vec.get_feat_weights() feat_weights = state2vec.get_feat_weights()
cdef int i cdef int i
cdef np.ndarray hidden_weights = numpy.ascontiguousarray(vec2scores._layers[-1].W.T) cdef np.ndarray hidden_weights = numpy.ascontiguousarray(vec2scores._layers[-1].W.T)
@ -438,7 +442,7 @@ cdef class Parser:
is_valid = <int*>calloc(nr_class, sizeof(int)) is_valid = <int*>calloc(nr_class, sizeof(int))
vectors = <float*>calloc(nr_hidden * nr_piece, sizeof(float)) vectors = <float*>calloc(nr_hidden * nr_piece, sizeof(float))
scores = <float*>calloc(nr_class, sizeof(float)) scores = <float*>calloc(nr_class, sizeof(float))
while not state.is_final(): while not state.is_final():
state.set_context_tokens(token_ids, nr_feat) state.set_context_tokens(token_ids, nr_feat)
memset(vectors, 0, nr_hidden * nr_piece * sizeof(float)) memset(vectors, 0, nr_hidden * nr_piece * sizeof(float))
@ -448,7 +452,12 @@ cdef class Parser:
V = vectors V = vectors
W = hW W = hW
for i in range(nr_hidden): for i in range(nr_hidden):
feature = V[0] if V[0] >= V[1] else V[1] if nr_piece == 1:
feature = V[0]
elif nr_piece == 2:
feature = V[0] if V[0] >= V[1] else V[1]
else:
feature = Vec.max(V, nr_piece)
for j in range(nr_class): for j in range(nr_class):
scores[j] += feature * W[j] scores[j] += feature * W[j]
W += nr_class W += nr_class