mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-28 05:44:13 +03:00
run normal textcat train script with transformers (#4834)
* keep trf tok2vec and wordpiecer components during update * also support transformer models for other example scripts
This commit is contained in:
parent
90c52128dc
commit
fbfc418745
|
@ -131,7 +131,8 @@ def train_textcat(nlp, n_texts, n_iter=10):
|
||||||
train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats]))
|
train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats]))
|
||||||
|
|
||||||
# get names of other pipes to disable them during training
|
# get names of other pipes to disable them during training
|
||||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
|
pipe_exceptions = ["textcat", "trf_wordpiecer", "trf_tok2vec"]
|
||||||
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||||
with nlp.disable_pipes(*other_pipes): # only train textcat
|
with nlp.disable_pipes(*other_pipes): # only train textcat
|
||||||
optimizer = nlp.begin_training()
|
optimizer = nlp.begin_training()
|
||||||
textcat.model.tok2vec.from_bytes(tok2vec_weights)
|
textcat.model.tok2vec.from_bytes(tok2vec_weights)
|
||||||
|
|
|
@ -63,7 +63,8 @@ def main(model_name, unlabelled_loc):
|
||||||
optimizer.b2 = 0.0
|
optimizer.b2 = 0.0
|
||||||
|
|
||||||
# get names of other pipes to disable them during training
|
# get names of other pipes to disable them during training
|
||||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
|
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
|
||||||
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||||
sizes = compounding(1.0, 4.0, 1.001)
|
sizes = compounding(1.0, 4.0, 1.001)
|
||||||
with nlp.disable_pipes(*other_pipes):
|
with nlp.disable_pipes(*other_pipes):
|
||||||
for itn in range(n_iter):
|
for itn in range(n_iter):
|
||||||
|
|
|
@ -113,7 +113,8 @@ def main(kb_path, vocab_path=None, output_dir=None, n_iter=50):
|
||||||
TRAIN_DOCS.append((doc, annotation_clean))
|
TRAIN_DOCS.append((doc, annotation_clean))
|
||||||
|
|
||||||
# get names of other pipes to disable them during training
|
# get names of other pipes to disable them during training
|
||||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "entity_linker"]
|
pipe_exceptions = ["entity_linker", "trf_wordpiecer", "trf_tok2vec"]
|
||||||
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||||
with nlp.disable_pipes(*other_pipes): # only train entity linker
|
with nlp.disable_pipes(*other_pipes): # only train entity linker
|
||||||
# reset and initialize the weights randomly
|
# reset and initialize the weights randomly
|
||||||
optimizer = nlp.begin_training()
|
optimizer = nlp.begin_training()
|
||||||
|
|
|
@ -124,7 +124,8 @@ def main(model=None, output_dir=None, n_iter=15):
|
||||||
for dep in annotations.get("deps", []):
|
for dep in annotations.get("deps", []):
|
||||||
parser.add_label(dep)
|
parser.add_label(dep)
|
||||||
|
|
||||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"]
|
pipe_exceptions = ["parser", "trf_wordpiecer", "trf_tok2vec"]
|
||||||
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||||
with nlp.disable_pipes(*other_pipes): # only train parser
|
with nlp.disable_pipes(*other_pipes): # only train parser
|
||||||
optimizer = nlp.begin_training()
|
optimizer = nlp.begin_training()
|
||||||
for itn in range(n_iter):
|
for itn in range(n_iter):
|
||||||
|
|
|
@ -55,7 +55,8 @@ def main(model=None, output_dir=None, n_iter=100):
|
||||||
ner.add_label(ent[2])
|
ner.add_label(ent[2])
|
||||||
|
|
||||||
# get names of other pipes to disable them during training
|
# get names of other pipes to disable them during training
|
||||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
|
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
|
||||||
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||||
with nlp.disable_pipes(*other_pipes): # only train NER
|
with nlp.disable_pipes(*other_pipes): # only train NER
|
||||||
# reset and initialize the weights randomly – but only if we're
|
# reset and initialize the weights randomly – but only if we're
|
||||||
# training a new model
|
# training a new model
|
||||||
|
|
|
@ -95,7 +95,8 @@ def main(model=None, new_model_name="animal", output_dir=None, n_iter=30):
|
||||||
optimizer = nlp.resume_training()
|
optimizer = nlp.resume_training()
|
||||||
move_names = list(ner.move_names)
|
move_names = list(ner.move_names)
|
||||||
# get names of other pipes to disable them during training
|
# get names of other pipes to disable them during training
|
||||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
|
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
|
||||||
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||||
with nlp.disable_pipes(*other_pipes): # only train NER
|
with nlp.disable_pipes(*other_pipes): # only train NER
|
||||||
sizes = compounding(1.0, 4.0, 1.001)
|
sizes = compounding(1.0, 4.0, 1.001)
|
||||||
# batch up the examples using spaCy's minibatch
|
# batch up the examples using spaCy's minibatch
|
||||||
|
|
|
@ -65,7 +65,8 @@ def main(model=None, output_dir=None, n_iter=15):
|
||||||
parser.add_label(dep)
|
parser.add_label(dep)
|
||||||
|
|
||||||
# get names of other pipes to disable them during training
|
# get names of other pipes to disable them during training
|
||||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"]
|
pipe_exceptions = ["parser", "trf_wordpiecer", "trf_tok2vec"]
|
||||||
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||||
with nlp.disable_pipes(*other_pipes): # only train parser
|
with nlp.disable_pipes(*other_pipes): # only train parser
|
||||||
optimizer = nlp.begin_training()
|
optimizer = nlp.begin_training()
|
||||||
for itn in range(n_iter):
|
for itn in range(n_iter):
|
||||||
|
|
|
@ -67,7 +67,8 @@ def main(model=None, output_dir=None, n_iter=20, n_texts=2000, init_tok2vec=None
|
||||||
train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats]))
|
train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats]))
|
||||||
|
|
||||||
# get names of other pipes to disable them during training
|
# get names of other pipes to disable them during training
|
||||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
|
pipe_exceptions = ["textcat", "trf_wordpiecer", "trf_tok2vec"]
|
||||||
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
|
||||||
with nlp.disable_pipes(*other_pipes): # only train textcat
|
with nlp.disable_pipes(*other_pipes): # only train textcat
|
||||||
optimizer = nlp.begin_training()
|
optimizer = nlp.begin_training()
|
||||||
if init_tok2vec is not None:
|
if init_tok2vec is not None:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user