run normal textcat train script with transformers (#4834)

* keep trf tok2vec and wordpiecer components during update

* also support transformer models for other example scripts
This commit is contained in:
Sofie Van Landeghem 2020-01-16 02:01:23 +01:00 committed by Matthew Honnibal
parent 90c52128dc
commit fbfc418745
8 changed files with 16 additions and 8 deletions

View File

@ -131,7 +131,8 @@ def train_textcat(nlp, n_texts, n_iter=10):
train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats]))
# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
pipe_exceptions = ["textcat", "trf_wordpiecer", "trf_tok2vec"]
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
with nlp.disable_pipes(*other_pipes): # only train textcat
optimizer = nlp.begin_training()
textcat.model.tok2vec.from_bytes(tok2vec_weights)

View File

@ -63,7 +63,8 @@ def main(model_name, unlabelled_loc):
optimizer.b2 = 0.0
# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
sizes = compounding(1.0, 4.0, 1.001)
with nlp.disable_pipes(*other_pipes):
for itn in range(n_iter):

View File

@ -113,7 +113,8 @@ def main(kb_path, vocab_path=None, output_dir=None, n_iter=50):
TRAIN_DOCS.append((doc, annotation_clean))
# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "entity_linker"]
pipe_exceptions = ["entity_linker", "trf_wordpiecer", "trf_tok2vec"]
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
with nlp.disable_pipes(*other_pipes): # only train entity linker
# reset and initialize the weights randomly
optimizer = nlp.begin_training()

View File

@ -124,7 +124,8 @@ def main(model=None, output_dir=None, n_iter=15):
for dep in annotations.get("deps", []):
parser.add_label(dep)
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"]
pipe_exceptions = ["parser", "trf_wordpiecer", "trf_tok2vec"]
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
with nlp.disable_pipes(*other_pipes): # only train parser
optimizer = nlp.begin_training()
for itn in range(n_iter):

View File

@ -55,7 +55,8 @@ def main(model=None, output_dir=None, n_iter=100):
ner.add_label(ent[2])
# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
with nlp.disable_pipes(*other_pipes): # only train NER
# reset and initialize the weights randomly but only if we're
# training a new model

View File

@ -95,7 +95,8 @@ def main(model=None, new_model_name="animal", output_dir=None, n_iter=30):
optimizer = nlp.resume_training()
move_names = list(ner.move_names)
# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
with nlp.disable_pipes(*other_pipes): # only train NER
sizes = compounding(1.0, 4.0, 1.001)
# batch up the examples using spaCy's minibatch

View File

@ -65,7 +65,8 @@ def main(model=None, output_dir=None, n_iter=15):
parser.add_label(dep)
# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"]
pipe_exceptions = ["parser", "trf_wordpiecer", "trf_tok2vec"]
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
with nlp.disable_pipes(*other_pipes): # only train parser
optimizer = nlp.begin_training()
for itn in range(n_iter):

View File

@ -67,7 +67,8 @@ def main(model=None, output_dir=None, n_iter=20, n_texts=2000, init_tok2vec=None
train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats]))
# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
pipe_exceptions = ["textcat", "trf_wordpiecer", "trf_tok2vec"]
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
with nlp.disable_pipes(*other_pipes): # only train textcat
optimizer = nlp.begin_training()
if init_tok2vec is not None: