2020-08-13 18:38:30 +03:00
{# This is a template for training configs used for the quickstart widget in
the docs and the init config command. It encodes various best practices and
can help generate the best possible configuration, given a user's requirements. #}
2022-12-02 12:17:11 +03:00
{%- set use_transformer = hardware != "cpu" and transformer_data -%}
2020-08-15 15:50:29 +03:00
{%- set transformer = transformer_data[optimize] if use_transformer else {} -%}
2023-06-07 16:52:28 +03:00
{%- set listener_components = ["tagger", "morphologizer", "parser", "ner", "textcat", "textcat_multilabel", "entity_linker", "span_finder", "spancat", "spancat_singlelabel", "trainable_lemmatizer"] -%}
2020-08-13 18:38:30 +03:00
[paths]
2020-09-29 23:33:46 +03:00
train = null
dev = null
2022-02-04 23:09:48 +03:00
{% if use_transformer or optimize == "efficiency" or not word_vectors -%}
vectors = null
{% else -%}
vectors = "{{ word_vectors }}"
{% endif -%}
2020-08-13 18:38:30 +03:00
2020-08-15 15:50:29 +03:00
[system]
2020-09-20 13:30:53 +03:00
{% if use_transformer -%}
gpu_allocator = "pytorch"
{% else -%}
gpu_allocator = null
{% endif %}
2020-08-15 15:50:29 +03:00
2020-08-13 18:38:30 +03:00
[nlp]
lang = "{{ lang }}"
2021-11-03 17:11:07 +03:00
{%- set has_textcat = ("textcat" in components or "textcat_multilabel" in components) -%}
{%- set with_accuracy = optimize == "accuracy" -%}
2023-03-07 19:47:45 +03:00
{# The BOW textcat doesn't need a source of features, so it can omit the
tok2vec/transformer. #}
{%- set with_accuracy_or_transformer = (use_transformer or with_accuracy) -%}
{%- set textcat_needs_features = has_textcat and with_accuracy_or_transformer -%}
2023-06-07 16:52:28 +03:00
{%- if ("tagger" in components or "morphologizer" in components or "parser" in components or "ner" in components or "span_finder" in components or "spancat" in components or "spancat_singlelabel" in components or "trainable_lemmatizer" in components or "entity_linker" in components or textcat_needs_features) -%}
2022-04-01 10:01:04 +03:00
{%- set full_pipeline = ["transformer" if use_transformer else "tok2vec"] + components -%}
2021-03-02 17:53:30 +03:00
{%- else -%}
2022-04-01 10:01:04 +03:00
{%- set full_pipeline = components -%}
2021-03-02 17:53:30 +03:00
{%- endif %}
2020-08-15 15:50:29 +03:00
pipeline = {{ full_pipeline|pprint()|replace("'", '"')|safe }}
2020-12-09 11:13:26 +03:00
batch_size = {{ 128 if hardware == "gpu" else 1000 }}
2020-08-13 18:38:30 +03:00
[components]
{# TRANSFORMER PIPELINE #}
2020-08-15 15:50:29 +03:00
{%- if use_transformer -%}
2020-08-13 18:38:30 +03:00
[components.transformer]
factory = "transformer"
[components.transformer.model]
2021-10-18 15:15:06 +03:00
@architectures = "spacy-transformers.TransformerModel.v3"
2020-08-15 15:50:29 +03:00
name = "{{ transformer["name"] }}"
2020-08-13 18:38:30 +03:00
tokenizer_config = {"use_fast": true}
[components.transformer.model.get_spans]
2020-09-03 18:37:06 +03:00
@span_getters = "spacy-transformers.strided_spans.v1"
2020-08-13 18:38:30 +03:00
window = 128
stride = 96
2020-10-02 16:06:16 +03:00
{% if "morphologizer" in components %}
[components.morphologizer]
factory = "morphologizer"
[components.morphologizer.model]
2022-03-15 16:15:31 +03:00
@architectures = "spacy.Tagger.v2"
2020-10-02 16:06:16 +03:00
nO = null
[components.morphologizer.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
grad_factor = 1.0
[components.morphologizer.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
{%- endif %}
2020-08-13 18:38:30 +03:00
{% if "tagger" in components %}
[components.tagger]
factory = "tagger"
[components.tagger.model]
2022-03-15 16:15:31 +03:00
@architectures = "spacy.Tagger.v2"
2020-08-13 18:38:30 +03:00
nO = null
[components.tagger.model.tok2vec]
2020-08-31 13:41:39 +03:00
@architectures = "spacy-transformers.TransformerListener.v1"
2020-08-13 18:38:30 +03:00
grad_factor = 1.0
[components.tagger.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
{%- endif %}
{% if "parser" in components -%}
[components.parser]
factory = "parser"
[components.parser.model]
2020-12-18 13:56:57 +03:00
@architectures = "spacy.TransitionBasedParser.v2"
2020-09-23 17:53:49 +03:00
state_type = "parser"
2020-09-23 14:35:09 +03:00
extra_state_tokens = false
2020-08-13 18:38:30 +03:00
hidden_width = 128
maxout_pieces = 3
use_upper = false
nO = null
[components.parser.model.tok2vec]
2020-08-31 13:41:39 +03:00
@architectures = "spacy-transformers.TransformerListener.v1"
2020-08-13 18:38:30 +03:00
grad_factor = 1.0
[components.parser.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
{%- endif %}
{% if "ner" in components -%}
[components.ner]
factory = "ner"
[components.ner.model]
2020-12-18 13:56:57 +03:00
@architectures = "spacy.TransitionBasedParser.v2"
2020-09-23 14:35:09 +03:00
state_type = "ner"
extra_state_tokens = false
2020-08-13 18:38:30 +03:00
hidden_width = 64
maxout_pieces = 2
use_upper = false
nO = null
[components.ner.model.tok2vec]
2020-08-31 13:41:39 +03:00
@architectures = "spacy-transformers.TransformerListener.v1"
2020-08-13 18:38:30 +03:00
grad_factor = 1.0
[components.ner.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
{% endif -%}
2023-06-07 16:52:28 +03:00
{% if "span_finder" in components -%}
[components.span_finder]
factory = "span_finder"
max_length = null
min_length = null
scorer = {"@scorers":"spacy.span_finder_scorer.v1"}
spans_key = "sc"
threshold = 0.5
[components.span_finder.model]
@architectures = "spacy.SpanFinder.v1"
[components.span_finder.model.scorer]
@layers = "spacy.LinearLogistic.v1"
nO = 2
[components.span_finder.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
grad_factor = 1.0
[components.span_finder.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
{% endif -%}
2022-04-01 10:01:04 +03:00
{% if "spancat" in components -%}
[components.spancat]
factory = "spancat"
max_positive = null
scorer = {"@scorers":"spacy.spancat_scorer.v1"}
spans_key = "sc"
threshold = 0.5
[components.spancat.model]
@architectures = "spacy.SpanCategorizer.v1"
[components.spancat.model.reducer]
@layers = "spacy.mean_max_reducer.v1"
hidden_size = 128
[components.spancat.model.scorer]
@layers = "spacy.LinearLogistic.v1"
nO = null
nI = null
[components.spancat.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
grad_factor = 1.0
[components.spancat.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
[components.spancat.suggester]
@misc = "spacy.ngram_suggester.v1"
sizes = [1,2,3]
{% endif -%}
2023-03-09 12:30:59 +03:00
{% if "spancat_singlelabel" in components %}
[components.spancat_singlelabel]
factory = "spancat_singlelabel"
negative_weight = 1.0
allow_overlap = true
scorer = {"@scorers":"spacy.spancat_scorer.v1"}
spans_key = "sc"
[components.spancat_singlelabel.model]
@architectures = "spacy.SpanCategorizer.v1"
[components.spancat_singlelabel.model.reducer]
@layers = "spacy.mean_max_reducer.v1"
hidden_size = 128
[components.spancat_singlelabel.model.scorer]
@layers = "Softmax.v2"
[components.spancat_singlelabel.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
grad_factor = 1.0
[components.spancat_singlelabel.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
[components.spancat_singlelabel.suggester]
@misc = "spacy.ngram_suggester.v1"
sizes = [1,2,3]
{% endif %}
2022-04-01 10:01:04 +03:00
{% if "trainable_lemmatizer" in components -%}
[components.trainable_lemmatizer]
factory = "trainable_lemmatizer"
backoff = "orth"
min_tree_freq = 3
overwrite = false
scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
top_k = 1
[components.trainable_lemmatizer.model]
@architectures = "spacy.Tagger.v2"
nO = null
normalize = false
[components.trainable_lemmatizer.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
grad_factor = 1.0
[components.trainable_lemmatizer.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
{% endif -%}
2020-09-22 11:40:05 +03:00
{% if "entity_linker" in components -%}
[components.entity_linker]
factory = "entity_linker"
get_candidates = {"@misc":"spacy.CandidateGenerator.v1"}
incl_context = true
incl_prior = true
[components.entity_linker.model]
Fix entity linker batching (#9669)
* Partial fix of entity linker batching
* Add import
* Better name
* Add `use_gold_ents` option, docs
* Change to v2, create stub v1, update docs etc.
* Fix error type
Honestly no idea what the right type to use here is.
ConfigValidationError seems wrong. Maybe a NotImplementedError?
* Make mypy happy
* Add hacky fix for init issue
* Add legacy pipeline entity linker
* Fix references to class name
* Add __init__.py for legacy
* Attempted fix for loss issue
* Remove placeholder V1
* formatting
* slightly more interesting train data
* Handle batches with no usable examples
This adds a test for batches that have docs but not entities, and a
check in the component that detects such cases and skips the update step
as thought the batch were empty.
* Remove todo about data verification
Check for empty data was moved further up so this should be OK now - the
case in question shouldn't be possible.
* Fix gradient calculation
The model doesn't know which entities are not in the kb, so it generates
embeddings for the context of all of them.
However, the loss does know which entities aren't in the kb, and it
ignores them, as there's no sensible gradient.
This has the issue that the gradient will not be calculated for some of
the input embeddings, which causes a dimension mismatch in backprop.
That should have caused a clear error, but with numpyops it was causing
nans to happen, which is another problem that should be addressed
separately.
This commit changes the loss to give a zero gradient for entities not in
the kb.
* add failing test for v1 EL legacy architecture
* Add nasty but simple working check for legacy arch
* Clarify why init hack works the way it does
* Clarify use_gold_ents use case
* Fix use gold ents related handling
* Add tests for no gold ents and fix other tests
* Use aligned ents function (not working)
This doesn't actually work because the "aligned" ents are gold-only. But
if I have a different function that returns the intersection, *then*
this will work as desired.
* Use proper matching ent check
This changes the process when gold ents are not used so that the
intersection of ents in the pred and gold is used.
* Move get_matching_ents to Example
* Use model attribute to check for legacy arch
* Rename flag
* bump spacy-legacy to lower 3.0.9
Co-authored-by: svlandeg <svlandeg@github.com>
2022-03-04 11:17:36 +03:00
@architectures = "spacy.EntityLinker.v2"
2020-09-22 11:40:05 +03:00
nO = null
[components.entity_linker.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
grad_factor = 1.0
2020-09-23 10:24:28 +03:00
[components.entity_linker.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
2020-09-22 11:40:05 +03:00
{% endif -%}
2020-09-22 11:22:06 +03:00
{% if "textcat" in components %}
[components.textcat]
factory = "textcat"
{% if optimize == "accuracy" %}
[components.textcat.model]
2020-10-18 15:50:41 +03:00
@architectures = "spacy.TextCatEnsemble.v2"
nO = null
[components.textcat.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
grad_factor = 1.0
2020-11-10 15:14:47 +03:00
[components.textcat.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
2020-10-18 15:50:41 +03:00
[components.textcat.model.linear_model]
2021-06-16 12:45:00 +03:00
@architectures = "spacy.TextCatBOW.v2"
2021-01-06 05:07:14 +03:00
exclusive_classes = true
2020-09-22 11:22:06 +03:00
ngram_size = 1
2020-10-18 15:50:41 +03:00
no_output_layer = false
2020-09-22 11:22:06 +03:00
{% else -%}
[components.textcat.model]
2023-03-07 19:47:45 +03:00
@architectures = "spacy.TextCatCNN.v2"
2021-01-06 05:07:14 +03:00
exclusive_classes = true
2023-03-07 19:47:45 +03:00
nO = null
[components.textcat.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
grad_factor = 1.0
[components.textcat.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
2021-01-06 05:07:14 +03:00
{%- endif %}
{%- endif %}
{% if "textcat_multilabel" in components %}
[components.textcat_multilabel]
factory = "textcat_multilabel"
{% if optimize == "accuracy" %}
[components.textcat_multilabel.model]
@architectures = "spacy.TextCatEnsemble.v2"
nO = null
[components.textcat_multilabel.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
grad_factor = 1.0
[components.textcat_multilabel.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
[components.textcat_multilabel.model.linear_model]
2021-06-16 12:45:00 +03:00
@architectures = "spacy.TextCatBOW.v2"
2021-01-06 05:07:14 +03:00
exclusive_classes = false
ngram_size = 1
no_output_layer = false
{% else -%}
[components.textcat_multilabel.model]
2023-03-07 19:47:45 +03:00
@architectures = "spacy.TextCatCNN.v2"
2020-09-22 11:22:06 +03:00
exclusive_classes = false
2023-03-07 19:47:45 +03:00
nO = null
[components.textcat_multilabel.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
grad_factor = 1.0
[components.textcat_multilabel.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
2020-09-22 11:22:06 +03:00
{%- endif %}
{%- endif %}
2020-08-13 18:38:30 +03:00
{# NON-TRANSFORMER PIPELINE #}
{% else -%}
2021-11-03 17:11:07 +03:00
{% if "tok2vec" in full_pipeline -%}
2020-08-13 18:38:30 +03:00
[components.tok2vec]
factory = "tok2vec"
[components.tok2vec.model]
2021-01-07 08:39:27 +03:00
@architectures = "spacy.Tok2Vec.v2"
2020-08-13 18:38:30 +03:00
[components.tok2vec.model.embed]
2021-04-22 11:04:15 +03:00
@architectures = "spacy.MultiHashEmbed.v2"
2020-08-20 12:20:58 +03:00
width = ${components.tok2vec.model.encode.width}
2020-10-05 22:19:41 +03:00
attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"]
2022-09-09 16:10:04 +03:00
rows = [5000, 1000, 2500, 2500]
2020-10-05 22:21:30 +03:00
include_static_vectors = {{ "true" if optimize == "accuracy" else "false" }}
2020-08-13 18:38:30 +03:00
[components.tok2vec.model.encode]
2021-01-07 08:39:27 +03:00
@architectures = "spacy.MaxoutWindowEncoder.v2"
2020-08-13 18:38:30 +03:00
width = {{ 96 if optimize == "efficiency" else 256 }}
depth = {{ 4 if optimize == "efficiency" else 8 }}
window_size = 1
maxout_pieces = 3
2021-10-20 18:30:19 +03:00
{% endif -%}
2020-08-13 18:38:30 +03:00
2020-10-02 16:06:16 +03:00
{% if "morphologizer" in components %}
[components.morphologizer]
factory = "morphologizer"
2023-03-22 14:17:56 +03:00
label_smoothing = 0.05
2020-10-02 16:06:16 +03:00
[components.morphologizer.model]
2022-03-15 16:15:31 +03:00
@architectures = "spacy.Tagger.v2"
2020-10-02 16:06:16 +03:00
nO = null
[components.morphologizer.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode.width}
{%- endif %}
2020-08-13 18:38:30 +03:00
{% if "tagger" in components %}
[components.tagger]
factory = "tagger"
2023-03-22 14:17:56 +03:00
label_smoothing = 0.05
2020-08-13 18:38:30 +03:00
[components.tagger.model]
2022-03-15 16:15:31 +03:00
@architectures = "spacy.Tagger.v2"
2020-08-13 18:38:30 +03:00
nO = null
[components.tagger.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
2020-08-20 12:20:58 +03:00
width = ${components.tok2vec.model.encode.width}
2020-08-13 18:38:30 +03:00
{%- endif %}
{% if "parser" in components -%}
[components.parser]
factory = "parser"
[components.parser.model]
2020-12-18 13:56:57 +03:00
@architectures = "spacy.TransitionBasedParser.v2"
2020-09-23 17:53:49 +03:00
state_type = "parser"
2020-09-23 14:35:09 +03:00
extra_state_tokens = false
2020-08-13 18:38:30 +03:00
hidden_width = 128
maxout_pieces = 3
use_upper = true
nO = null
[components.parser.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
2020-08-20 12:20:58 +03:00
width = ${components.tok2vec.model.encode.width}
2020-08-13 18:38:30 +03:00
{%- endif %}
{% if "ner" in components %}
[components.ner]
factory = "ner"
[components.ner.model]
2020-12-18 13:56:57 +03:00
@architectures = "spacy.TransitionBasedParser.v2"
2020-09-23 14:35:09 +03:00
state_type = "ner"
extra_state_tokens = false
2020-08-13 18:38:30 +03:00
hidden_width = 64
maxout_pieces = 2
use_upper = true
nO = null
[components.ner.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
2020-08-20 12:20:58 +03:00
width = ${components.tok2vec.model.encode.width}
2020-08-13 18:38:30 +03:00
{% endif %}
2020-09-22 11:22:06 +03:00
2023-06-07 16:52:28 +03:00
{% if "span_finder" in components %}
[components.span_finder]
factory = "span_finder"
max_length = null
min_length = null
scorer = {"@scorers":"spacy.span_finder_scorer.v1"}
spans_key = "sc"
threshold = 0.5
[components.span_finder.model]
@architectures = "spacy.SpanFinder.v1"
[components.span_finder.model.scorer]
@layers = "spacy.LinearLogistic.v1"
nO = 2
[components.span_finder.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode.width}
{% endif %}
2022-04-01 10:01:04 +03:00
{% if "spancat" in components %}
[components.spancat]
factory = "spancat"
max_positive = null
scorer = {"@scorers":"spacy.spancat_scorer.v1"}
spans_key = "sc"
threshold = 0.5
[components.spancat.model]
@architectures = "spacy.SpanCategorizer.v1"
[components.spancat.model.reducer]
@layers = "spacy.mean_max_reducer.v1"
hidden_size = 128
[components.spancat.model.scorer]
@layers = "spacy.LinearLogistic.v1"
nO = null
nI = null
[components.spancat.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode.width}
[components.spancat.suggester]
@misc = "spacy.ngram_suggester.v1"
sizes = [1,2,3]
{% endif %}
2023-03-09 12:30:59 +03:00
{% if "spancat_singlelabel" in components %}
[components.spancat_singlelabel]
factory = "spancat_singlelabel"
negative_weight = 1.0
allow_overlap = true
scorer = {"@scorers":"spacy.spancat_scorer.v1"}
spans_key = "sc"
[components.spancat_singlelabel.model]
@architectures = "spacy.SpanCategorizer.v1"
[components.spancat_singlelabel.model.reducer]
@layers = "spacy.mean_max_reducer.v1"
hidden_size = 128
[components.spancat_singlelabel.model.scorer]
@layers = "Softmax.v2"
[components.spancat_singlelabel.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode.width}
[components.spancat_singlelabel.suggester]
@misc = "spacy.ngram_suggester.v1"
sizes = [1,2,3]
{% endif %}
2022-04-01 10:01:04 +03:00
{% if "trainable_lemmatizer" in components -%}
[components.trainable_lemmatizer]
factory = "trainable_lemmatizer"
backoff = "orth"
min_tree_freq = 3
overwrite = false
scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
top_k = 1
[components.trainable_lemmatizer.model]
@architectures = "spacy.Tagger.v2"
nO = null
normalize = false
[components.trainable_lemmatizer.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode.width}
{% endif -%}
2020-09-22 11:40:05 +03:00
{% if "entity_linker" in components -%}
[components.entity_linker]
factory = "entity_linker"
get_candidates = {"@misc":"spacy.CandidateGenerator.v1"}
incl_context = true
incl_prior = true
[components.entity_linker.model]
Fix entity linker batching (#9669)
* Partial fix of entity linker batching
* Add import
* Better name
* Add `use_gold_ents` option, docs
* Change to v2, create stub v1, update docs etc.
* Fix error type
Honestly no idea what the right type to use here is.
ConfigValidationError seems wrong. Maybe a NotImplementedError?
* Make mypy happy
* Add hacky fix for init issue
* Add legacy pipeline entity linker
* Fix references to class name
* Add __init__.py for legacy
* Attempted fix for loss issue
* Remove placeholder V1
* formatting
* slightly more interesting train data
* Handle batches with no usable examples
This adds a test for batches that have docs but not entities, and a
check in the component that detects such cases and skips the update step
as thought the batch were empty.
* Remove todo about data verification
Check for empty data was moved further up so this should be OK now - the
case in question shouldn't be possible.
* Fix gradient calculation
The model doesn't know which entities are not in the kb, so it generates
embeddings for the context of all of them.
However, the loss does know which entities aren't in the kb, and it
ignores them, as there's no sensible gradient.
This has the issue that the gradient will not be calculated for some of
the input embeddings, which causes a dimension mismatch in backprop.
That should have caused a clear error, but with numpyops it was causing
nans to happen, which is another problem that should be addressed
separately.
This commit changes the loss to give a zero gradient for entities not in
the kb.
* add failing test for v1 EL legacy architecture
* Add nasty but simple working check for legacy arch
* Clarify why init hack works the way it does
* Clarify use_gold_ents use case
* Fix use gold ents related handling
* Add tests for no gold ents and fix other tests
* Use aligned ents function (not working)
This doesn't actually work because the "aligned" ents are gold-only. But
if I have a different function that returns the intersection, *then*
this will work as desired.
* Use proper matching ent check
This changes the process when gold ents are not used so that the
intersection of ents in the pred and gold is used.
* Move get_matching_ents to Example
* Use model attribute to check for legacy arch
* Rename flag
* bump spacy-legacy to lower 3.0.9
Co-authored-by: svlandeg <svlandeg@github.com>
2022-03-04 11:17:36 +03:00
@architectures = "spacy.EntityLinker.v2"
2020-09-22 11:40:05 +03:00
nO = null
[components.entity_linker.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode.width}
{% endif %}
2020-09-22 11:22:06 +03:00
{% if "textcat" in components %}
[components.textcat]
factory = "textcat"
{% if optimize == "accuracy" %}
[components.textcat.model]
2020-10-18 15:50:41 +03:00
@architectures = "spacy.TextCatEnsemble.v2"
nO = null
[components.textcat.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode.width}
[components.textcat.model.linear_model]
2021-06-16 12:45:00 +03:00
@architectures = "spacy.TextCatBOW.v2"
2021-01-06 05:07:14 +03:00
exclusive_classes = true
2020-09-22 11:22:06 +03:00
ngram_size = 1
2020-10-18 15:50:41 +03:00
no_output_layer = false
2020-09-22 11:22:06 +03:00
{% else -%}
[components.textcat.model]
2021-06-16 12:45:00 +03:00
@architectures = "spacy.TextCatBOW.v2"
2021-01-06 05:07:14 +03:00
exclusive_classes = true
ngram_size = 1
no_output_layer = false
{%- endif %}
{%- endif %}
{% if "textcat_multilabel" in components %}
[components.textcat_multilabel]
factory = "textcat_multilabel"
{% if optimize == "accuracy" %}
[components.textcat_multilabel.model]
@architectures = "spacy.TextCatEnsemble.v2"
nO = null
[components.textcat_multilabel.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode.width}
[components.textcat_multilabel.model.linear_model]
2021-06-16 12:45:00 +03:00
@architectures = "spacy.TextCatBOW.v2"
2021-01-06 05:07:14 +03:00
exclusive_classes = false
ngram_size = 1
no_output_layer = false
{% else -%}
[components.textcat_multilabel.model]
2021-06-16 12:45:00 +03:00
@architectures = "spacy.TextCatBOW.v2"
2020-09-22 11:22:06 +03:00
exclusive_classes = false
ngram_size = 1
2020-09-22 13:06:40 +03:00
no_output_layer = false
2020-09-22 11:22:06 +03:00
{%- endif %}
{%- endif %}
2020-08-13 18:38:30 +03:00
{% endif %}
{% for pipe in components %}
2022-04-01 10:01:04 +03:00
{% if pipe not in listener_components %}
2020-08-13 18:38:30 +03:00
{# Other components defined by the user: we just assume they're factories #}
[components.{{ pipe }}]
factory = "{{ pipe }}"
{% endif %}
{% endfor %}
2020-09-17 12:38:59 +03:00
[corpora]
[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths.train}
2021-05-20 15:48:09 +03:00
max_length = 0
2020-09-17 12:38:59 +03:00
[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths.dev}
max_length = 0
2020-08-13 18:38:30 +03:00
[training]
2020-08-15 15:50:29 +03:00
{% if use_transformer -%}
accumulate_gradient = {{ transformer["size_factor"] }}
2020-09-23 14:21:42 +03:00
{% endif -%}
2020-09-17 12:38:59 +03:00
dev_corpus = "corpora.dev"
train_corpus = "corpora.train"
2020-08-13 18:38:30 +03:00
[training.optimizer]
@optimizers = "Adam.v1"
2020-09-04 22:22:50 +03:00
{% if use_transformer -%}
2020-08-13 18:38:30 +03:00
[training.optimizer.learn_rate]
@schedules = "warmup_linear.v1"
warmup_steps = 250
total_steps = 20000
initial_rate = 5e-5
2020-09-04 22:22:50 +03:00
{% endif %}
2020-08-13 18:38:30 +03:00
2020-08-15 15:50:29 +03:00
{% if use_transformer %}
2020-08-13 18:38:30 +03:00
[training.batcher]
2020-09-03 18:30:41 +03:00
@batchers = "spacy.batch_by_padded.v1"
2020-08-13 18:38:30 +03:00
discard_oversize = true
size = 2000
buffer = 256
{%- else %}
[training.batcher]
2020-09-03 18:30:41 +03:00
@batchers = "spacy.batch_by_words.v1"
2020-08-13 18:38:30 +03:00
discard_oversize = false
tolerance = 0.2
[training.batcher.size]
@schedules = "compounding.v1"
start = 100
stop = 1000
compound = 1.001
{% endif %}
2020-09-28 13:05:23 +03:00
[initialize]
2021-06-23 12:07:14 +03:00
vectors = ${paths.vectors}