spaCy/spacy/default_config_pretraining.cfg

42 lines
655 B
INI
Raw Normal View History

2020-09-28 13:31:54 +03:00
[paths]
raw_text = null
[pretraining]
max_epochs = 1000
dropout = 0.2
n_save_every = null
component = "tok2vec"
layer = ""
corpus = "corpora.pretrain"
[pretraining.batcher]
@batchers = "spacy.batch_by_words.v1"
size = 3000
discard_oversize = false
tolerance = 0.2
get_length = null
[pretraining.objective]
type = "characters"
n_characters = 4
[pretraining.optimizer]
@optimizers = "Adam.v1"
beta1 = 0.9
beta2 = 0.999
L2_is_weight_decay = true
L2 = 0.01
grad_clip = 1.0
use_averages = true
eps = 1e-8
learn_rate = 0.001
[corpora]
[corpora.pretrain]
@readers = "spacy.JsonlReader.v1"
2020-09-28 13:05:23 +03:00
path = ${paths.raw_text}
min_length = 5
max_length = 500
limit = 0