mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
run normal textcat train script with transformers (#4834)
* keep trf tok2vec and wordpiecer components during update * also support transformer models for other example scripts
This commit is contained in:
parent
90c52128dc
commit
fbfc418745
|
@ -131,7 +131,8 @@ def train_textcat(nlp, n_texts, n_iter=10):
|
|||
train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats]))
|
||||
|
||||
# get names of other pipes to disable them during training
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
|
||||
pipe_exceptions = ["textcat", "trf_wordpiecer", "trf_tok2vec"]
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||
with nlp.disable_pipes(*other_pipes): # only train textcat
|
||||
optimizer = nlp.begin_training()
|
||||
textcat.model.tok2vec.from_bytes(tok2vec_weights)
|
||||
|
|
|
@ -63,7 +63,8 @@ def main(model_name, unlabelled_loc):
|
|||
optimizer.b2 = 0.0
|
||||
|
||||
# get names of other pipes to disable them during training
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
|
||||
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||
sizes = compounding(1.0, 4.0, 1.001)
|
||||
with nlp.disable_pipes(*other_pipes):
|
||||
for itn in range(n_iter):
|
||||
|
|
|
@ -113,7 +113,8 @@ def main(kb_path, vocab_path=None, output_dir=None, n_iter=50):
|
|||
TRAIN_DOCS.append((doc, annotation_clean))
|
||||
|
||||
# get names of other pipes to disable them during training
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "entity_linker"]
|
||||
pipe_exceptions = ["entity_linker", "trf_wordpiecer", "trf_tok2vec"]
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||
with nlp.disable_pipes(*other_pipes): # only train entity linker
|
||||
# reset and initialize the weights randomly
|
||||
optimizer = nlp.begin_training()
|
||||
|
|
|
@ -124,7 +124,8 @@ def main(model=None, output_dir=None, n_iter=15):
|
|||
for dep in annotations.get("deps", []):
|
||||
parser.add_label(dep)
|
||||
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"]
|
||||
pipe_exceptions = ["parser", "trf_wordpiecer", "trf_tok2vec"]
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||
with nlp.disable_pipes(*other_pipes): # only train parser
|
||||
optimizer = nlp.begin_training()
|
||||
for itn in range(n_iter):
|
||||
|
|
|
@ -55,7 +55,8 @@ def main(model=None, output_dir=None, n_iter=100):
|
|||
ner.add_label(ent[2])
|
||||
|
||||
# get names of other pipes to disable them during training
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
|
||||
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||
with nlp.disable_pipes(*other_pipes): # only train NER
|
||||
# reset and initialize the weights randomly – but only if we're
|
||||
# training a new model
|
||||
|
|
|
@ -95,7 +95,8 @@ def main(model=None, new_model_name="animal", output_dir=None, n_iter=30):
|
|||
optimizer = nlp.resume_training()
|
||||
move_names = list(ner.move_names)
|
||||
# get names of other pipes to disable them during training
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
|
||||
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||
with nlp.disable_pipes(*other_pipes): # only train NER
|
||||
sizes = compounding(1.0, 4.0, 1.001)
|
||||
# batch up the examples using spaCy's minibatch
|
||||
|
|
|
@ -65,7 +65,8 @@ def main(model=None, output_dir=None, n_iter=15):
|
|||
parser.add_label(dep)
|
||||
|
||||
# get names of other pipes to disable them during training
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"]
|
||||
pipe_exceptions = ["parser", "trf_wordpiecer", "trf_tok2vec"]
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||
with nlp.disable_pipes(*other_pipes): # only train parser
|
||||
optimizer = nlp.begin_training()
|
||||
for itn in range(n_iter):
|
||||
|
|
|
@ -67,7 +67,8 @@ def main(model=None, output_dir=None, n_iter=20, n_texts=2000, init_tok2vec=None
|
|||
train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats]))
|
||||
|
||||
# get names of other pipes to disable them during training
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
|
||||
pipe_exceptions = ["textcat", "trf_wordpiecer", "trf_tok2vec"]
|
||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||
with nlp.disable_pipes(*other_pipes): # only train textcat
|
||||
optimizer = nlp.begin_training()
|
||||
if init_tok2vec is not None:
|
||||
|
|
Loading…
Reference in New Issue
Block a user