{# Template for "CPU" configs. The transformer will use a different template. #} # This is an auto-generated partial config for training a model. # To use it for training, auto-fill it with all default values. # python -m spacy init config config.cfg --base base_config.cfg [paths] train = "" dev = "" [nlp] lang = "{{ lang }}" pipeline = {{ pipeline|safe }} vectors = {{ ('"en_vectors_web_lg"' if optimize == "accuracy" else false)|safe }} tokenizer = {"@tokenizers": "spacy.Tokenizer.v1"} [components] [components.tok2vec] factory = "tok2vec" [components.tok2vec.model] @architectures = "spacy.Tok2Vec.v1" [components.tok2vec.model.embed] @architectures = "spacy.MultiHashEmbed.v1" width = ${components.tok2vec.model.encode:width} rows = {{ 2000 if optimize == "efficiency" else 7000 }} also_embed_subwords = {{ true if has_letters else false }} also_use_static_vectors = {{ true if optimize == "accuracy" else false }} [components.tok2vec.model.encode] @architectures = "spacy.MaxoutWindowEncoder.v1" width = {{ 96 if optimize == "efficiency" else 256 }} depth = {{ 4 if optimize == "efficiency" else 8 }} window_size = 1 maxout_pieces = 3 {% if "tagger" in components %} [components.tagger] factory = "tagger" [components.tagger.model] @architectures = "spacy.Tagger.v1" nO = null [components.tagger.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode:width} {%- endif %} {% if "parser" in components -%} [components.parser] factory = "parser" [components.parser.model] @architectures = "spacy.TransitionBasedParser.v1" nr_feature_tokens = 8 hidden_width = 128 maxout_pieces = 3 use_upper = true nO = null [components.parser.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode:width} {%- endif %} {% if "ner" in components -%} [components.ner] factory = "ner" [components.ner.model] @architectures = "spacy.TransitionBasedParser.v1" nr_feature_tokens = 6 hidden_width = 64 maxout_pieces = 2 use_upper = true nO = null [components.ner.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode:width} {% endif -%} [training] [training.train_corpus] @readers = "spacy.Corpus.v1" path = ${paths:train} [training.dev_corpus] @readers = "spacy.Corpus.v1" path = ${paths:dev} [training.score_weights] {%- if "tagger" in components %} tag_acc = {{ (1.0 / components|length)|round() }} {%- endif -%} {%- if "parser" in components %} dep_uas = 0.0 dep_las = {{ (1.0 / components|length)|round() }} sents_f = 0.0 {%- endif %} {%- if "ner" in components %} ents_f = {{ (1.0 / components|length)|round() }} ents_p = 0.0 ents_r = 0.0 {%- endif -%}