{# This is a template for training configs used for the quickstart widget in the docs and the init config command. It encodes various best practices and can help generate the best possible configuration, given a user's requirements. #} {%- set use_transformer = hardware != "cpu" and transformer_data -%} {%- set transformer = transformer_data[optimize] if use_transformer else {} -%} {%- set listener_components = ["tagger", "morphologizer", "parser", "ner", "textcat", "textcat_multilabel", "entity_linker", "span_finder", "spancat", "spancat_singlelabel", "trainable_lemmatizer"] -%} [paths] train = null dev = null {% if use_transformer or optimize == "efficiency" or not word_vectors -%} vectors = null {% else -%} vectors = "{{ word_vectors }}" {% endif -%} [system] {% if use_transformer -%} gpu_allocator = "pytorch" {% else -%} gpu_allocator = null {% endif %} [nlp] lang = "{{ lang }}" {%- set has_textcat = ("textcat" in components or "textcat_multilabel" in components) -%} {%- set with_accuracy = optimize == "accuracy" -%} {# The BOW textcat doesn't need a source of features, so it can omit the tok2vec/transformer. #} {%- set with_accuracy_or_transformer = (use_transformer or with_accuracy) -%} {%- set textcat_needs_features = has_textcat and with_accuracy_or_transformer -%} {%- if ("tagger" in components or "morphologizer" in components or "parser" in components or "ner" in components or "span_finder" in components or "spancat" in components or "spancat_singlelabel" in components or "trainable_lemmatizer" in components or "entity_linker" in components or textcat_needs_features) -%} {%- set full_pipeline = ["transformer" if use_transformer else "tok2vec"] + components -%} {%- else -%} {%- set full_pipeline = components -%} {%- endif %} pipeline = {{ full_pipeline|pprint()|replace("'", '"')|safe }} batch_size = {{ 128 if hardware == "gpu" else 1000 }} [components] {# TRANSFORMER PIPELINE #} {%- if use_transformer -%} [components.transformer] factory = "transformer" [components.transformer.model] @architectures = "spacy-transformers.TransformerModel.v3" name = "{{ transformer["name"] }}" tokenizer_config = {"use_fast": true} [components.transformer.model.get_spans] @span_getters = "spacy-transformers.strided_spans.v1" window = 128 stride = 96 {% if "morphologizer" in components %} [components.morphologizer] factory = "morphologizer" [components.morphologizer.model] @architectures = "spacy.Tagger.v2" nO = null [components.morphologizer.model.tok2vec] @architectures = "spacy-transformers.TransformerListener.v1" grad_factor = 1.0 [components.morphologizer.model.tok2vec.pooling] @layers = "reduce_mean.v1" {%- endif %} {% if "tagger" in components %} [components.tagger] factory = "tagger" [components.tagger.model] @architectures = "spacy.Tagger.v2" nO = null [components.tagger.model.tok2vec] @architectures = "spacy-transformers.TransformerListener.v1" grad_factor = 1.0 [components.tagger.model.tok2vec.pooling] @layers = "reduce_mean.v1" {%- endif %} {% if "parser" in components -%} [components.parser] factory = "parser" [components.parser.model] @architectures = "spacy.TransitionBasedParser.v2" state_type = "parser" extra_state_tokens = false hidden_width = 128 maxout_pieces = 3 use_upper = false nO = null [components.parser.model.tok2vec] @architectures = "spacy-transformers.TransformerListener.v1" grad_factor = 1.0 [components.parser.model.tok2vec.pooling] @layers = "reduce_mean.v1" {%- endif %} {% if "ner" in components -%} [components.ner] factory = "ner" [components.ner.model] @architectures = "spacy.TransitionBasedParser.v2" state_type = "ner" extra_state_tokens = false hidden_width = 64 maxout_pieces = 2 use_upper = false nO = null [components.ner.model.tok2vec] @architectures = "spacy-transformers.TransformerListener.v1" grad_factor = 1.0 [components.ner.model.tok2vec.pooling] @layers = "reduce_mean.v1" {% endif -%} {% if "span_finder" in components -%} [components.span_finder] factory = "span_finder" max_length = 25 min_length = null scorer = {"@scorers":"spacy.span_finder_scorer.v1"} spans_key = "sc" threshold = 0.5 [components.span_finder.model] @architectures = "spacy.SpanFinder.v1" [components.span_finder.model.scorer] @layers = "spacy.LinearLogistic.v1" nO = 2 [components.span_finder.model.tok2vec] @architectures = "spacy-transformers.TransformerListener.v1" grad_factor = 1.0 [components.span_finder.model.tok2vec.pooling] @layers = "reduce_mean.v1" {% endif -%} {% if "spancat" in components -%} [components.spancat] factory = "spancat" max_positive = null scorer = {"@scorers":"spacy.spancat_scorer.v1"} spans_key = "sc" threshold = 0.5 [components.spancat.model] @architectures = "spacy.SpanCategorizer.v1" [components.spancat.model.reducer] @layers = "spacy.mean_max_reducer.v1" hidden_size = 128 [components.spancat.model.scorer] @layers = "spacy.LinearLogistic.v1" nO = null nI = null [components.spancat.model.tok2vec] @architectures = "spacy-transformers.TransformerListener.v1" grad_factor = 1.0 [components.spancat.model.tok2vec.pooling] @layers = "reduce_mean.v1" [components.spancat.suggester] @misc = "spacy.ngram_suggester.v1" sizes = [1,2,3] {% endif -%} {% if "spancat_singlelabel" in components %} [components.spancat_singlelabel] factory = "spancat_singlelabel" negative_weight = 1.0 allow_overlap = true scorer = {"@scorers":"spacy.spancat_scorer.v1"} spans_key = "sc" [components.spancat_singlelabel.model] @architectures = "spacy.SpanCategorizer.v1" [components.spancat_singlelabel.model.reducer] @layers = "spacy.mean_max_reducer.v1" hidden_size = 128 [components.spancat_singlelabel.model.scorer] @layers = "Softmax.v2" [components.spancat_singlelabel.model.tok2vec] @architectures = "spacy-transformers.TransformerListener.v1" grad_factor = 1.0 [components.spancat_singlelabel.model.tok2vec.pooling] @layers = "reduce_mean.v1" [components.spancat_singlelabel.suggester] @misc = "spacy.ngram_suggester.v1" sizes = [1,2,3] {% endif %} {% if "trainable_lemmatizer" in components -%} [components.trainable_lemmatizer] factory = "trainable_lemmatizer" backoff = "orth" min_tree_freq = 3 overwrite = false scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"} top_k = 1 [components.trainable_lemmatizer.model] @architectures = "spacy.Tagger.v2" nO = null normalize = false [components.trainable_lemmatizer.model.tok2vec] @architectures = "spacy-transformers.TransformerListener.v1" grad_factor = 1.0 [components.trainable_lemmatizer.model.tok2vec.pooling] @layers = "reduce_mean.v1" {% endif -%} {% if "entity_linker" in components -%} [components.entity_linker] factory = "entity_linker" get_candidates = {"@misc":"spacy.CandidateGenerator.v1"} incl_context = true incl_prior = true [components.entity_linker.model] @architectures = "spacy.EntityLinker.v2" nO = null [components.entity_linker.model.tok2vec] @architectures = "spacy-transformers.TransformerListener.v1" grad_factor = 1.0 [components.entity_linker.model.tok2vec.pooling] @layers = "reduce_mean.v1" {% endif -%} {% if "textcat" in components %} [components.textcat] factory = "textcat" {% if optimize == "accuracy" %} [components.textcat.model] @architectures = "spacy.TextCatEnsemble.v2" nO = null [components.textcat.model.tok2vec] @architectures = "spacy-transformers.TransformerListener.v1" grad_factor = 1.0 [components.textcat.model.tok2vec.pooling] @layers = "reduce_mean.v1" [components.textcat.model.linear_model] @architectures = "spacy.TextCatBOW.v3" exclusive_classes = true length = 262144 ngram_size = 1 no_output_layer = false {% else -%} [components.textcat.model] @architectures = "spacy.TextCatReduce.v1" exclusive_classes = true use_reduce_first = false use_reduce_max = false use_reduce_mean = true nO = null [components.textcat.model.tok2vec] @architectures = "spacy-transformers.TransformerListener.v1" grad_factor = 1.0 [components.textcat.model.tok2vec.pooling] @layers = "reduce_mean.v1" {%- endif %} {%- endif %} {% if "textcat_multilabel" in components %} [components.textcat_multilabel] factory = "textcat_multilabel" {% if optimize == "accuracy" %} [components.textcat_multilabel.model] @architectures = "spacy.TextCatEnsemble.v2" nO = null [components.textcat_multilabel.model.tok2vec] @architectures = "spacy-transformers.TransformerListener.v1" grad_factor = 1.0 [components.textcat_multilabel.model.tok2vec.pooling] @layers = "reduce_mean.v1" [components.textcat_multilabel.model.linear_model] @architectures = "spacy.TextCatBOW.v3" exclusive_classes = false length = 262144 ngram_size = 1 no_output_layer = false {% else -%} [components.textcat_multilabel.model] @architectures = "spacy.TextCatReduce.v1" exclusive_classes = false use_reduce_first = false use_reduce_max = false use_reduce_mean = true nO = null [components.textcat_multilabel.model.tok2vec] @architectures = "spacy-transformers.TransformerListener.v1" grad_factor = 1.0 [components.textcat_multilabel.model.tok2vec.pooling] @layers = "reduce_mean.v1" {%- endif %} {%- endif %} {# NON-TRANSFORMER PIPELINE #} {% else -%} {% if "tok2vec" in full_pipeline -%} [components.tok2vec] factory = "tok2vec" [components.tok2vec.model] @architectures = "spacy.Tok2Vec.v2" [components.tok2vec.model.embed] @architectures = "spacy.MultiHashEmbed.v2" width = ${components.tok2vec.model.encode.width} attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"] rows = [5000, 1000, 2500, 2500] include_static_vectors = {{ "true" if optimize == "accuracy" else "false" }} [components.tok2vec.model.encode] @architectures = "spacy.MaxoutWindowEncoder.v2" width = {{ 96 if optimize == "efficiency" else 256 }} depth = {{ 4 if optimize == "efficiency" else 8 }} window_size = 1 maxout_pieces = 3 {% endif -%} {% if "morphologizer" in components %} [components.morphologizer] factory = "morphologizer" label_smoothing = 0.05 [components.morphologizer.model] @architectures = "spacy.Tagger.v2" nO = null [components.morphologizer.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode.width} {%- endif %} {% if "tagger" in components %} [components.tagger] factory = "tagger" label_smoothing = 0.05 [components.tagger.model] @architectures = "spacy.Tagger.v2" nO = null [components.tagger.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode.width} {%- endif %} {% if "parser" in components -%} [components.parser] factory = "parser" [components.parser.model] @architectures = "spacy.TransitionBasedParser.v2" state_type = "parser" extra_state_tokens = false hidden_width = 128 maxout_pieces = 3 use_upper = true nO = null [components.parser.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode.width} {%- endif %} {% if "ner" in components %} [components.ner] factory = "ner" [components.ner.model] @architectures = "spacy.TransitionBasedParser.v2" state_type = "ner" extra_state_tokens = false hidden_width = 64 maxout_pieces = 2 use_upper = true nO = null [components.ner.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode.width} {% endif %} {% if "span_finder" in components %} [components.span_finder] factory = "span_finder" max_length = 25 min_length = null scorer = {"@scorers":"spacy.span_finder_scorer.v1"} spans_key = "sc" threshold = 0.5 [components.span_finder.model] @architectures = "spacy.SpanFinder.v1" [components.span_finder.model.scorer] @layers = "spacy.LinearLogistic.v1" nO = 2 [components.span_finder.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode.width} {% endif %} {% if "spancat" in components %} [components.spancat] factory = "spancat" max_positive = null scorer = {"@scorers":"spacy.spancat_scorer.v1"} spans_key = "sc" threshold = 0.5 [components.spancat.model] @architectures = "spacy.SpanCategorizer.v1" [components.spancat.model.reducer] @layers = "spacy.mean_max_reducer.v1" hidden_size = 128 [components.spancat.model.scorer] @layers = "spacy.LinearLogistic.v1" nO = null nI = null [components.spancat.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode.width} [components.spancat.suggester] @misc = "spacy.ngram_suggester.v1" sizes = [1,2,3] {% endif %} {% if "spancat_singlelabel" in components %} [components.spancat_singlelabel] factory = "spancat_singlelabel" negative_weight = 1.0 allow_overlap = true scorer = {"@scorers":"spacy.spancat_scorer.v1"} spans_key = "sc" [components.spancat_singlelabel.model] @architectures = "spacy.SpanCategorizer.v1" [components.spancat_singlelabel.model.reducer] @layers = "spacy.mean_max_reducer.v1" hidden_size = 128 [components.spancat_singlelabel.model.scorer] @layers = "Softmax.v2" [components.spancat_singlelabel.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode.width} [components.spancat_singlelabel.suggester] @misc = "spacy.ngram_suggester.v1" sizes = [1,2,3] {% endif %} {% if "trainable_lemmatizer" in components -%} [components.trainable_lemmatizer] factory = "trainable_lemmatizer" backoff = "orth" min_tree_freq = 3 overwrite = false scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"} top_k = 1 [components.trainable_lemmatizer.model] @architectures = "spacy.Tagger.v2" nO = null normalize = false [components.trainable_lemmatizer.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode.width} {% endif -%} {% if "entity_linker" in components -%} [components.entity_linker] factory = "entity_linker" get_candidates = {"@misc":"spacy.CandidateGenerator.v1"} incl_context = true incl_prior = true [components.entity_linker.model] @architectures = "spacy.EntityLinker.v2" nO = null [components.entity_linker.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode.width} {% endif %} {% if "textcat" in components %} [components.textcat] factory = "textcat" {% if optimize == "accuracy" %} [components.textcat.model] @architectures = "spacy.TextCatEnsemble.v2" nO = null [components.textcat.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode.width} [components.textcat.model.linear_model] @architectures = "spacy.TextCatBOW.v3" exclusive_classes = true length = 262144 ngram_size = 1 no_output_layer = false {% else -%} [components.textcat.model] @architectures = "spacy.TextCatBOW.v3" exclusive_classes = true ngram_size = 1 no_output_layer = false {%- endif %} {%- endif %} {% if "textcat_multilabel" in components %} [components.textcat_multilabel] factory = "textcat_multilabel" {% if optimize == "accuracy" %} [components.textcat_multilabel.model] @architectures = "spacy.TextCatEnsemble.v2" nO = null [components.textcat_multilabel.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode.width} [components.textcat_multilabel.model.linear_model] @architectures = "spacy.TextCatBOW.v3" exclusive_classes = false length = 262144 ngram_size = 1 no_output_layer = false {% else -%} [components.textcat_multilabel.model] @architectures = "spacy.TextCatBOW.v3" exclusive_classes = false length = 262144 ngram_size = 1 no_output_layer = false {%- endif %} {%- endif %} {% endif %} {% for pipe in components %} {% if pipe not in listener_components %} {# Other components defined by the user: we just assume they're factories #} [components.{{ pipe }}] factory = "{{ pipe }}" {% endif %} {% endfor %} [corpora] [corpora.train] @readers = "spacy.Corpus.v1" path = ${paths.train} max_length = 0 [corpora.dev] @readers = "spacy.Corpus.v1" path = ${paths.dev} max_length = 0 [training] {% if use_transformer -%} accumulate_gradient = {{ transformer["size_factor"] }} {% endif -%} dev_corpus = "corpora.dev" train_corpus = "corpora.train" [training.optimizer] @optimizers = "Adam.v1" {% if use_transformer -%} [training.optimizer.learn_rate] @schedules = "warmup_linear.v1" warmup_steps = 250 total_steps = 20000 initial_rate = 5e-5 {% endif %} {% if use_transformer %} [training.batcher] @batchers = "spacy.batch_by_padded.v1" discard_oversize = true size = 2000 buffer = 256 {%- else %} [training.batcher] @batchers = "spacy.batch_by_words.v1" discard_oversize = false tolerance = 0.2 [training.batcher.size] @schedules = "compounding.v1" start = 100 stop = 1000 compound = 1.001 {% endif %} [initialize] vectors = ${paths.vectors}