diff --git a/examples/experiments/onto-ner.cfg b/examples/experiments/onto-ner.cfg index 228289128..eab68a27f 100644 --- a/examples/experiments/onto-ner.cfg +++ b/examples/experiments/onto-ner.cfg @@ -5,7 +5,7 @@ # data is passed in sentence-by-sentence via some prior preprocessing. gold_preproc = false # Limitations on training document length or number of examples. -max_length = 5000 +max_length = 3000 limit = 0 # Data augmentation orth_variant_level = 0.0 @@ -17,20 +17,20 @@ max_steps = 0 eval_frequency = 1000 # Other settings seed = 0 -accumulate_gradient = 2 +accumulate_gradient = 1 use_pytorch_for_gpu_memory = false # Control how scores are printed and checkpoints are evaluated. scores = ["speed", "ents_p", "ents_r", "ents_f"] score_weights = {"ents_f": 1.0} # These settings are invalid for the transformer models. init_tok2vec = null -discard_oversize = true +discard_oversize = false omit_extra_lookups = false -batch_by_words = true +batch_by = "words" [training.batch_size] @schedules = "compounding.v1" -start = 1000 +start = 100 stop = 1000 compound = 1.001 @@ -45,12 +45,6 @@ use_averages = true eps = 1e-8 learn_rate = 0.001 -#[training.optimizer.learn_rate] -#@schedules = "warmup_linear.v1" -#warmup_steps = 1000 -#total_steps = 50000 -#initial_rate = 0.003 - [nlp] lang = "en" vectors = null @@ -74,6 +68,6 @@ width = 96 depth = 4 window_size = 1 embed_size = 2000 -maxout_pieces = 1 +maxout_pieces = 3 subword_features = true dropout = ${training:dropout}