Use increasing beam_update_prob in ud-train

This commit is contained in:
Matthew Honnibal 2018-05-16 23:21:53 +02:00
commit b9e415a5f8
2 changed files with 3 additions and 1 deletions

View File

@ -196,6 +196,7 @@ def setup_package():
'plac<1.0.0,>=0.9.6',
'pathlib',
'ujson>=1.35',
'regex==2017.4.5',
'dill>=0.2,<0.3'],
setup_requires=['wheel'],
classifiers=[

View File

@ -370,7 +370,7 @@ def main(ud_dir, parses_dir, config, corpus, limit=0, use_gpu=-1, vectors_dir=No
optimizer = initialize_pipeline(nlp, docs, golds, config, use_gpu)
batch_sizes = compounding(config.batch_size//10, config.batch_size, 1.001)
nlp.parser.cfg['beam_update_prob'] = 1.0
beam_prob = compounding(0.2, 0.8, 1.001)
for i in range(config.nr_epoch):
docs, golds = read_data(nlp, paths.train.conllu.open(), paths.train.text.open(),
max_doc_length=config.max_doc_length, limit=limit,
@ -385,6 +385,7 @@ def main(ud_dir, parses_dir, config, corpus, limit=0, use_gpu=-1, vectors_dir=No
for batch in batches:
batch_docs, batch_gold = zip(*batch)
pbar.update(sum(len(doc) for doc in batch_docs))
nlp.parser.cfg['beam_update_prob'] = next(beam_prob)
nlp.update(batch_docs, batch_gold, sgd=optimizer,
drop=config.dropout, losses=losses)