[paths] train = "" dev = "" raw = null init_tok2vec = null [system] seed = 0 use_pytorch_for_gpu_memory = false [training] seed = ${system:seed} dropout = 0.1 init_tok2vec = ${paths:init_tok2vec} vectors = null accumulate_gradient = 1 max_steps = 0 max_epochs = 0 patience = 10000 eval_frequency = 200 score_weights = {"dep_las": 0.4, "ents_f": 0.4, "tag_acc": 0.2} frozen_components = [] [training.train_corpus] @readers = "spacy.Corpus.v1" path = ${paths:train} gold_preproc = true max_length = 0 limit = 0 [training.dev_corpus] @readers = "spacy.Corpus.v1" path = ${paths:dev} gold_preproc = ${training.read_train:gold_preproc} max_length = 0 limit = 0 [training.batcher] @batchers = "batch_by_words.v1" discard_oversize = false tolerance = 0.2 [training.batcher.size] @schedules = "compounding.v1" start = 100 stop = 1000 compound = 1.001 [training.optimizer] @optimizers = "Adam.v1" beta1 = 0.9 beta2 = 0.999 L2_is_weight_decay = true L2 = 0.01 grad_clip = 1.0 use_averages = false eps = 1e-8 learn_rate = 0.001 [nlp] lang = "en" load_vocab_data = false pipeline = ["tok2vec", "ner", "tagger", "parser"] [nlp.tokenizer] @tokenizers = "spacy.Tokenizer.v1" [nlp.lemmatizer] @lemmatizers = "spacy.Lemmatizer.v1" [components] [components.tok2vec] factory = "tok2vec" [components.ner] factory = "ner" learn_tokens = false min_action_freq = 1 [components.tagger] factory = "tagger" [components.parser] factory = "parser" learn_tokens = false min_action_freq = 30 [components.tagger.model] @architectures = "spacy.Tagger.v1" [components.tagger.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode:width} [components.parser.model] @architectures = "spacy.TransitionBasedParser.v1" nr_feature_tokens = 8 hidden_width = 128 maxout_pieces = 2 use_upper = true [components.parser.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode:width} [components.ner.model] @architectures = "spacy.TransitionBasedParser.v1" nr_feature_tokens = 3 hidden_width = 128 maxout_pieces = 2 use_upper = true [components.ner.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" width = ${components.tok2vec.model.encode:width} [components.tok2vec.model] @architectures = "spacy.Tok2Vec.v1" [components.tok2vec.model.embed] @architectures = "spacy.MultiHashEmbed.v1" width = ${components.tok2vec.model.encode:width} rows = 2000 also_embed_subwords = true also_use_static_vectors = false [components.tok2vec.model.encode] @architectures = "spacy.MaxoutWindowEncoder.v1" width = 96 depth = 4 window_size = 1 maxout_pieces = 3