From d163115e91aaa6a0f73b05b05bcca9774d76bf7c Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 7 Oct 2017 21:00:43 -0500 Subject: [PATCH 1/4] Add non-linearity after history features --- spacy/_ml.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spacy/_ml.py b/spacy/_ml.py index 898d6ab49..23facb9fb 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -264,7 +264,8 @@ def HistoryFeatures(nr_class, hist_size=8, nr_dim=8): return layerize(noop()) embed_tables = [Embed(nr_dim, nr_class, column=i, name='embed%d') for i in range(hist_size)] - embed = concatenate(*embed_tables) + embed = chain(concatenate(*embed_tables), + LN(Maxout(hist_size*nr_dim, hist_size*nr_dim))) ops = embed.ops def add_history_fwd(vectors_hists, drop=0.): vectors, hist_ids = vectors_hists From 9d66a915da3c78346ebf6a47fac54dd5eb94c246 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 7 Oct 2017 21:02:38 -0500 Subject: [PATCH 2/4] Update training defaults --- spacy/cli/train.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index b27087056..80bb11798 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -78,11 +78,11 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=10, n_sents=0, # starts high and decays sharply, to force the optimizer to explore. # Batch size starts at 1 and grows, so that we make updates quickly # at the beginning of training. - dropout_rates = util.decaying(util.env_opt('dropout_from', 0.2), - util.env_opt('dropout_to', 0.2), - util.env_opt('dropout_decay', 0.0)) + dropout_rates = util.decaying(util.env_opt('dropout_from', 0.6), + util.env_opt('dropout_to', 0.1), + util.env_opt('dropout_decay', 1e-5)) batch_sizes = util.compounding(util.env_opt('batch_from', 1), - util.env_opt('batch_to', 16), + util.env_opt('batch_to', 4), util.env_opt('batch_compound', 1.001)) corpus = GoldCorpus(train_path, dev_path, limit=n_sents) n_train_words = corpus.count_train() From 42b401d08b5b4b6968d2ed3e70e0a3c580b6c60b Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 7 Oct 2017 21:05:21 -0500 Subject: [PATCH 3/4] Change default hidden depth to 1 --- spacy/syntax/nn_parser.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index fdcf1d2d1..153f7a484 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -239,7 +239,7 @@ cdef class Parser: """ @classmethod def Model(cls, nr_class, **cfg): - depth = util.env_opt('parser_hidden_depth', cfg.get('hidden_depth', 2)) + depth = util.env_opt('parser_hidden_depth', cfg.get('hidden_depth', 1)) token_vector_width = util.env_opt('token_vector_width', cfg.get('token_vector_width', 128)) hidden_width = util.env_opt('hidden_width', cfg.get('hidden_width', 128)) parser_maxout_pieces = util.env_opt('parser_maxout_pieces', cfg.get('maxout_pieces', 1)) From be4f0b64605b036f06fdd919253b719fdc88b5bb Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 8 Oct 2017 02:08:12 -0500 Subject: [PATCH 4/4] Update defaults --- spacy/cli/train.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index 80bb11798..b27087056 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -78,11 +78,11 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=10, n_sents=0, # starts high and decays sharply, to force the optimizer to explore. # Batch size starts at 1 and grows, so that we make updates quickly # at the beginning of training. - dropout_rates = util.decaying(util.env_opt('dropout_from', 0.6), - util.env_opt('dropout_to', 0.1), - util.env_opt('dropout_decay', 1e-5)) + dropout_rates = util.decaying(util.env_opt('dropout_from', 0.2), + util.env_opt('dropout_to', 0.2), + util.env_opt('dropout_decay', 0.0)) batch_sizes = util.compounding(util.env_opt('batch_from', 1), - util.env_opt('batch_to', 4), + util.env_opt('batch_to', 16), util.env_opt('batch_compound', 1.001)) corpus = GoldCorpus(train_path, dev_path, limit=n_sents) n_train_words = corpus.count_train()