Update default config [ci skip]

This commit is contained in:
Ines Montani 2020-10-01 22:27:37 +02:00
parent 86c3ec9c2b
commit 5762876dcc

View File

@ -2,7 +2,6 @@
train = null train = null
dev = null dev = null
vectors = null vectors = null
vocab_data = null
init_tok2vec = null init_tok2vec = null
[system] [system]
@ -11,8 +10,13 @@ gpu_allocator = null
[nlp] [nlp]
lang = null lang = null
# List of pipeline component names, in order. The names should correspond to
# components defined in the [components block]
pipeline = [] pipeline = []
# Components that are loaded but disabled by default
disabled = [] disabled = []
# Optional callbacks to modify the nlp object before it's initialized, after
# it's created and after the pipeline has been set up
before_creation = null before_creation = null
after_creation = null after_creation = null
after_pipeline_creation = null after_pipeline_creation = null
@ -20,6 +24,7 @@ after_pipeline_creation = null
[nlp.tokenizer] [nlp.tokenizer]
@tokenizers = "spacy.Tokenizer.v1" @tokenizers = "spacy.Tokenizer.v1"
# The pipeline components and their models
[components] [components]
# Readers for corpora like dev and train. # Readers for corpora like dev and train.
@ -38,8 +43,7 @@ max_length = 0
limit = 0 limit = 0
# Apply some simply data augmentation, where we replace tokens with variations. # Apply some simply data augmentation, where we replace tokens with variations.
# This is especially useful for punctuation and case replacement, to help # This is especially useful for punctuation and case replacement, to help
# generalize beyond corpora that don't have smart-quotes, or only have smart # generalize beyond corpora that don't/only have smart quotes etc.
# quotes, etc.
augmenter = null augmenter = null
[corpora.dev] [corpora.dev]
@ -53,6 +57,7 @@ gold_preproc = false
max_length = 0 max_length = 0
# Limitation on number of training examples # Limitation on number of training examples
limit = 0 limit = 0
# Optional callback for data augmentation
augmenter = null augmenter = null
# Training hyper-parameters and additional features. # Training hyper-parameters and additional features.
@ -102,17 +107,18 @@ use_averages = false
eps = 1e-8 eps = 1e-8
learn_rate = 0.001 learn_rate = 0.001
# The 'initialize' step is run before training or pretraining. Components and # These settings are used when nlp.initialize() is called (typically before
# the tokenizer can each define their own arguments via their .initialize # training or pretraining). Components and the tokenizer can each define their
# methods that are populated by the config. This lets them gather resources like # own arguments via their initialize methods that are populated by the config.
# lookup tables and build label sets, construct vocabularies, etc. # This lets them gather data resources, build label sets etc.
[initialize] [initialize]
vocab_data = ${paths.vocab_data}
lookups = null
vectors = ${paths.vectors} vectors = ${paths.vectors}
# Extra resources for transfer-learning or pseudo-rehearsal # Extra resources for transfer-learning or pseudo-rehearsal
init_tok2vec = ${paths.init_tok2vec} init_tok2vec = ${paths.init_tok2vec}
# Data and lookups for vocabulary
vocab_data = null
lookups = null
# Arguments passed to the tokenizer's initialize method # Arguments passed to the tokenizer's initialize method
tokenizer = {} tokenizer = {}
# Arguments passed to the initialize methods of the components (keyed by component name) # Arguments for initialize methods of the components (keyed by component)
components = {} components = {}