From 16ba6aa8a66b69eeeef482dc3247bc46e938aec7 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 6 Oct 2017 13:17:31 -0500 Subject: [PATCH] Fix parser config serialization --- spacy/syntax/nn_parser.pyx | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index f9c8c0c14..9ae53b103 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -238,14 +238,15 @@ cdef class Parser: Base class of the DependencyParser and EntityRecognizer. """ @classmethod - def Model(cls, nr_class, token_vector_width=128, hidden_width=200, depth=1, **cfg): - depth = util.env_opt('parser_hidden_depth', depth) - token_vector_width = util.env_opt('token_vector_width', token_vector_width) - hidden_width = util.env_opt('hidden_width', hidden_width) - parser_maxout_pieces = util.env_opt('parser_maxout_pieces', 2) - embed_size = util.env_opt('embed_size', 7000) - hist_size = util.env_opt('history_feats', cfg.get('history_feats', 0)) - hist_width = util.env_opt('history_width', cfg.get('history_width', 0)) + def Model(cls, nr_class, **cfg): + depth = util.env_opt('parser_hidden_depth', cfg.get('parser_hidden_depth', 1)) + token_vector_width = util.env_opt('token_vector_width', cfg.get('token_vector_width', 128)) + hidden_width = util.env_opt('hidden_width', cfg.get('hidden_width', 200)) + parser_maxout_pieces = util.env_opt('parser_maxout_pieces', cfg.get('parser_maxout_pieces', 3)) + embed_size = util.env_opt('embed_size', cfg.get('embed_size', 7000)) + hist_size = util.env_opt('history_feats', cfg.get('hist_size', 0)) + hist_width = util.env_opt('history_width', cfg.get('hist_width', 0)) + print("Create parser model", locals()) if hist_size >= 1 and depth == 0: raise ValueError("Inconsistent hyper-params: " "history_feats >= 1 but parser_hidden_depth==0") @@ -277,14 +278,14 @@ cdef class Parser: upper = chain( HistoryFeatures(nr_class=nr_class, hist_size=hist_size, nr_dim=hist_width), - Maxout(hidden_width, hidden_width+hist_size*hist_width), - clone(Maxout(hidden_width, hidden_width), depth-2), + LayerNorm(Maxout(hidden_width, hidden_width+hist_size*hist_width)), + clone(LayerNorm(Maxout(hidden_width, hidden_width)), depth-2), zero_init(Affine(nr_class, hidden_width, drop_factor=0.0)) ) upper.is_noop = False else: upper = chain( - clone(Maxout(hidden_width, hidden_width), depth-1), + clone(LayerNorm(Maxout(hidden_width, hidden_width)), depth-1), zero_init(Affine(nr_class, hidden_width, drop_factor=0.0)) ) upper.is_noop = False