diff --git a/examples/experiments/onto-ner.cfg b/examples/experiments/onto-ner.cfg index 8970bb3c0..228289128 100644 --- a/examples/experiments/onto-ner.cfg +++ b/examples/experiments/onto-ner.cfg @@ -13,24 +13,25 @@ dropout = 0.1 # Controls early-stopping. 0 or -1 mean unlimited. patience = 100000 max_epochs = 0 -max_steps = 100000 -eval_frequency = 2000 +max_steps = 0 +eval_frequency = 1000 # Other settings seed = 0 -accumulate_gradient = 1 +accumulate_gradient = 2 use_pytorch_for_gpu_memory = false # Control how scores are printed and checkpoints are evaluated. scores = ["speed", "ents_p", "ents_r", "ents_f"] score_weights = {"ents_f": 1.0} # These settings are invalid for the transformer models. init_tok2vec = null -discard_oversize = false +discard_oversize = true omit_extra_lookups = false +batch_by_words = true [training.batch_size] @schedules = "compounding.v1" -start = 100 -stop = 2000 +start = 1000 +stop = 1000 compound = 1.001 [training.optimizer] @@ -38,7 +39,7 @@ compound = 1.001 beta1 = 0.9 beta2 = 0.999 L2_is_weight_decay = true -L2 = 0.0 +L2 = 0.01 grad_clip = 1.0 use_averages = true eps = 1e-8 @@ -64,15 +65,15 @@ min_action_freq = 1 nr_feature_tokens = 3 hidden_width = 64 maxout_pieces = 2 -use_upper = false +use_upper = true [nlp.pipeline.ner.model.tok2vec] @architectures = "spacy.HashEmbedCNN.v1" pretrained_vectors = ${nlp:vectors} -width = 300 +width = 96 depth = 4 window_size = 1 -embed_size = 7000 +embed_size = 2000 maxout_pieces = 1 subword_features = true dropout = ${training:dropout}