diff --git a/pretrain.cfg b/pretrain.cfg deleted file mode 100644 index 50bd72350..000000000 --- a/pretrain.cfg +++ /dev/null @@ -1,218 +0,0 @@ -[paths] -train = null -dev = null -vectors = null -init_tok2vec = null -raw_text = null - -[system] -gpu_allocator = null -seed = 0 - -[nlp] -lang = "en" -pipeline = ["tok2vec","tagger","parser","ner"] -batch_size = 1000 -disabled = [] -before_creation = null -after_creation = null -after_pipeline_creation = null -tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"} - -[components] - -[components.ner] -factory = "ner" -moves = null -update_with_oracle_cut_size = 100 - -[components.ner.model] -@architectures = "spacy.TransitionBasedParser.v2" -state_type = "ner" -extra_state_tokens = false -hidden_width = 64 -maxout_pieces = 2 -use_upper = true -nO = null - -[components.ner.model.tok2vec] -@architectures = "spacy.Tok2VecListener.v1" -width = ${components.tok2vec.model.encode.width} -upstream = "*" - -[components.parser] -factory = "parser" -learn_tokens = false -min_action_freq = 30 -moves = null -update_with_oracle_cut_size = 100 - -[components.parser.model] -@architectures = "spacy.TransitionBasedParser.v2" -state_type = "parser" -extra_state_tokens = false -hidden_width = 128 -maxout_pieces = 3 -use_upper = true -nO = null - -[components.parser.model.tok2vec] -@architectures = "spacy.Tok2VecListener.v1" -width = ${components.tok2vec.model.encode.width} -upstream = "*" - -[components.tagger] -factory = "tagger" - -[components.tagger.model] -@architectures = "spacy.Tagger.v1" -nO = null - -[components.tagger.model.tok2vec] -@architectures = "spacy.Tok2VecListener.v1" -width = ${components.tok2vec.model.encode.width} -upstream = "*" - -[components.tok2vec] -factory = "tok2vec" - -[components.tok2vec.model] -@architectures = "spacy.Tok2Vec.v2" - -[components.tok2vec.model.embed] -@architectures = "spacy.MultiHashEmbed.v1" -width = ${components.tok2vec.model.encode.width} -attrs = ["NORM","PREFIX","SUFFIX","SHAPE"] -rows = [5000,2500,2500,2500] -include_static_vectors = false - -[components.tok2vec.model.encode] -@architectures = "spacy.MaxoutWindowEncoder.v2" -width = 96 -depth = 4 -window_size = 1 -maxout_pieces = 3 - -[corpora] - -[corpora.dev] -@readers = "spacy.Corpus.v1" -path = ${paths.dev} -max_length = 0 -gold_preproc = false -limit = 0 -augmenter = null - -[corpora.pretrain] -@readers = "spacy.JsonlCorpus.v1" -path = ${paths.raw_text} -min_length = 5 -max_length = 500 -limit = 0 - -[corpora.train] -@readers = "spacy.Corpus.v1" -path = ${paths.train} -max_length = 2000 -gold_preproc = false -limit = 0 -augmenter = null - -[training] -dev_corpus = "corpora.dev" -train_corpus = "corpora.train" -seed = ${system.seed} -gpu_allocator = ${system.gpu_allocator} -dropout = 0.1 -accumulate_gradient = 1 -patience = 1600 -max_epochs = 0 -max_steps = 20000 -eval_frequency = 200 -frozen_components = [] -before_to_disk = null - -[training.batcher] -@batchers = "spacy.batch_by_words.v1" -discard_oversize = false -tolerance = 0.2 -get_length = null - -[training.batcher.size] -@schedules = "compounding.v1" -start = 100 -stop = 1000 -compound = 1.001 -t = 0.0 - -[training.logger] -@loggers = "spacy.ConsoleLogger.v1" -progress_bar = false - -[training.optimizer] -@optimizers = "Adam.v1" -beta1 = 0.9 -beta2 = 0.999 -L2_is_weight_decay = true -L2 = 0.01 -grad_clip = 1.0 -use_averages = false -eps = 0.00000001 -learn_rate = 0.001 - -[training.score_weights] -dep_las_per_type = null -sents_p = null -sents_r = null -ents_per_type = null -tag_acc = 0.33 -dep_uas = 0.17 -dep_las = 0.17 -sents_f = 0.0 -ents_f = 0.33 -ents_p = 0.0 -ents_r = 0.0 - -[pretraining] -max_epochs = 1000 -dropout = 0.2 -n_save_every = null -component = "tok2vec" -layer = "" -corpus = "corpora.pretrain" - -[pretraining.batcher] -@batchers = "spacy.batch_by_words.v1" -size = 3000 -discard_oversize = false -tolerance = 0.2 -get_length = null - -[pretraining.objective] -@architectures = "spacy.PretrainCharacters.v1" -maxout_pieces = 3 -hidden_size = 300 -n_characters = 4 - -[pretraining.optimizer] -@optimizers = "Adam.v1" -beta1 = 0.9 -beta2 = 0.999 -L2_is_weight_decay = true -L2 = 0.01 -grad_clip = 1.0 -use_averages = true -eps = 0.00000001 -learn_rate = 0.001 - -[initialize] -vectors = null -init_tok2vec = ${paths.init_tok2vec} -vocab_data = null -lookups = null -before_init = null -after_init = null - -[initialize.components] - -[initialize.tokenizer] \ No newline at end of file diff --git a/pretrain_gpu.cfg b/pretrain_gpu.cfg deleted file mode 100644 index 6f9c9195d..000000000 --- a/pretrain_gpu.cfg +++ /dev/null @@ -1,217 +0,0 @@ -[paths] -train = null -dev = null -vectors = null -init_tok2vec = null -raw_text = null - -[system] -gpu_allocator = "pytorch" -seed = 0 - -[nlp] -lang = "en" -pipeline = ["transformer","tagger","parser","ner"] -batch_size = 128 -disabled = [] -before_creation = null -after_creation = null -after_pipeline_creation = null -tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"} - -[components] - -[components.ner] -factory = "ner" -moves = null -update_with_oracle_cut_size = 100 - -[components.ner.model] -@architectures = "spacy.TransitionBasedParser.v2" -state_type = "ner" -extra_state_tokens = false -hidden_width = 64 -maxout_pieces = 2 -use_upper = false -nO = null - -[components.ner.model.tok2vec] -@architectures = "spacy-transformers.TransformerListener.v1" -grad_factor = 1.0 -pooling = {"@layers":"reduce_mean.v1"} -upstream = "*" - -[components.parser] -factory = "parser" -learn_tokens = false -min_action_freq = 30 -moves = null -update_with_oracle_cut_size = 100 - -[components.parser.model] -@architectures = "spacy.TransitionBasedParser.v2" -state_type = "parser" -extra_state_tokens = false -hidden_width = 128 -maxout_pieces = 3 -use_upper = false -nO = null - -[components.parser.model.tok2vec] -@architectures = "spacy-transformers.TransformerListener.v1" -grad_factor = 1.0 -pooling = {"@layers":"reduce_mean.v1"} -upstream = "*" - -[components.tagger] -factory = "tagger" - -[components.tagger.model] -@architectures = "spacy.Tagger.v1" -nO = null - -[components.tagger.model.tok2vec] -@architectures = "spacy-transformers.TransformerListener.v1" -grad_factor = 1.0 -pooling = {"@layers":"reduce_mean.v1"} -upstream = "*" - -[components.transformer] -factory = "transformer" -max_batch_items = 4096 -set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} - -[components.transformer.model] -@architectures = "spacy-transformers.TransformerModel.v1" -name = "roberta-base" - -[components.transformer.model.get_spans] -@span_getters = "spacy-transformers.strided_spans.v1" -window = 128 -stride = 96 - -[components.transformer.model.tokenizer_config] -use_fast = true - -[corpora] - -[corpora.dev] -@readers = "spacy.Corpus.v1" -path = ${paths.dev} -max_length = 0 -gold_preproc = false -limit = 0 -augmenter = null - -[corpora.pretrain] -@readers = "spacy.JsonlCorpus.v1" -path = ${paths.raw_text} -min_length = 5 -max_length = 500 -limit = 0 - -[corpora.train] -@readers = "spacy.Corpus.v1" -path = ${paths.train} -max_length = 500 -gold_preproc = false -limit = 0 -augmenter = null - -[training] -accumulate_gradient = 3 -dev_corpus = "corpora.dev" -train_corpus = "corpora.train" -seed = ${system.seed} -gpu_allocator = ${system.gpu_allocator} -dropout = 0.1 -patience = 1600 -max_epochs = 0 -max_steps = 20000 -eval_frequency = 200 -frozen_components = [] -before_to_disk = null - -[training.batcher] -@batchers = "spacy.batch_by_padded.v1" -discard_oversize = true -size = 2000 -buffer = 256 -get_length = null - -[training.logger] -@loggers = "spacy.ConsoleLogger.v1" -progress_bar = false - -[training.optimizer] -@optimizers = "Adam.v1" -beta1 = 0.9 -beta2 = 0.999 -L2_is_weight_decay = true -L2 = 0.01 -grad_clip = 1.0 -use_averages = false -eps = 0.00000001 - -[training.optimizer.learn_rate] -@schedules = "warmup_linear.v1" -warmup_steps = 250 -total_steps = 20000 -initial_rate = 0.00005 - -[training.score_weights] -dep_las_per_type = null -sents_p = null -sents_r = null -ents_per_type = null -tag_acc = 0.33 -dep_uas = 0.17 -dep_las = 0.17 -sents_f = 0.0 -ents_f = 0.33 -ents_p = 0.0 -ents_r = 0.0 - -[pretraining] -max_epochs = 1000 -dropout = 0.2 -n_save_every = null -component = "tok2vec" -layer = "" -corpus = "corpora.pretrain" - -[pretraining.batcher] -@batchers = "spacy.batch_by_words.v1" -size = 3000 -discard_oversize = false -tolerance = 0.2 -get_length = null - -[pretraining.objective] -@architectures = "spacy.PretrainCharacters.v1" -maxout_pieces = 3 -hidden_size = 300 -n_characters = 4 - -[pretraining.optimizer] -@optimizers = "Adam.v1" -beta1 = 0.9 -beta2 = 0.999 -L2_is_weight_decay = true -L2 = 0.01 -grad_clip = 1.0 -use_averages = true -eps = 0.00000001 -learn_rate = 0.001 - -[initialize] -vectors = null -init_tok2vec = ${paths.init_tok2vec} -vocab_data = null -lookups = null -before_init = null -after_init = null - -[initialize.components] - -[initialize.tokenizer] \ No newline at end of file