[paths] raw_text = null [pretraining] max_epochs = 1000 dropout = 0.2 n_save_every = null n_save_epoch = null component = "tok2vec" layer = "" corpus = "corpora.pretrain" [pretraining.batcher] @batchers = "spacy.batch_by_words.v1" size = 3000 discard_oversize = false tolerance = 0.2 get_length = null [pretraining.objective] @architectures = "spacy.PretrainCharacters.v1" maxout_pieces = 3 hidden_size = 300 n_characters = 4 [pretraining.optimizer] @optimizers = "Adam.v1" beta1 = 0.9 beta2 = 0.999 L2_is_weight_decay = true L2 = 0.01 grad_clip = 1.0 use_averages = true eps = 1e-8 learn_rate = 0.001 [corpora] [corpora.pretrain] @readers = "spacy.JsonlCorpus.v1" path = ${paths.raw_text} min_length = 5 max_length = 500 limit = 0