Update onto-json default

2025-11-01 08:27:44 +03:00 · 2020-06-24 00:00:59 +02:00 · 2020-06-24 00:00:59 +02:00 · 5120113f39
commit 5120113f39
parent 306a591e1f
1 changed files with 7 additions and 16 deletions
--- a/examples/experiments/onto-joint/defaults.cfg
+++ b/examples/experiments/onto-joint/defaults.cfg
@ -5,7 +5,7 @@
 # data is passed in sentence-by-sentence via some prior preprocessing.
 gold_preproc = false
 # Limitations on training document length or number of examples.
-max_length = 0
+max_length = 5000
 limit = 0
 # Data augmentation
 orth_variant_level = 0.0
@ -14,7 +14,7 @@ dropout = 0.1
 patience = 1600
 max_epochs = 0
 max_steps = 20000
-eval_frequency = 400
+eval_frequency = 50
 # Other settings
 seed = 0
 accumulate_gradient = 1
@ -57,8 +57,6 @@ vectors = null
 [nlp.pipeline.tok2vec]
 factory = "tok2vec"

-[nlp.pipeline.senter]
-factory = "senter"

 [nlp.pipeline.ner]
 factory = "ner"
@ -73,17 +71,10 @@ factory = "tagger"
 [nlp.pipeline.parser]
 factory = "parser"
 learn_tokens = false
-min_action_freq = 1
+min_action_freq = 30
 beam_width = 1
 beam_update_prob = 1.0

-[nlp.pipeline.senter.model]
-@architectures = "spacy.Tagger.v1"
-
-[nlp.pipeline.senter.model.tok2vec]
-@architectures = "spacy.Tok2VecTensors.v1"
-width = ${nlp.pipeline.tok2vec.model:width}
-
 [nlp.pipeline.tagger.model]
@architectures = "spacy.Tagger.v1"

@ -116,10 +107,10 @@ width = ${nlp.pipeline.tok2vec.model:width}
 [nlp.pipeline.tok2vec.model]
@architectures = "spacy.HashEmbedCNN.v1"
 pretrained_vectors = ${nlp:vectors}
-width = 256
-depth = 6
+width = 128
+depth = 4
 window_size = 1
-embed_size = 10000
+embed_size = 7000
 maxout_pieces = 3
 subword_features = true
-dropout = null
+dropout = ${training:dropout}