Add speed benchmarks to metadata

This commit is contained in:
Matthew Honnibal 2017-10-09 08:05:37 -05:00
parent d8a2506023
commit 0f41b25f60

View File

@ -114,15 +114,33 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=10, n_sents=0,
nlp.to_disk(epoch_model_path) nlp.to_disk(epoch_model_path)
nlp_loaded = lang_class(pipeline=pipeline) nlp_loaded = lang_class(pipeline=pipeline)
nlp_loaded = nlp_loaded.from_disk(epoch_model_path) nlp_loaded = nlp_loaded.from_disk(epoch_model_path)
scorer = nlp_loaded.evaluate( dev_docs = list(corpus.dev_docs(
list(corpus.dev_docs(
nlp_loaded, nlp_loaded,
gold_preproc=gold_preproc))) gold_preproc=gold_preproc))
nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
start_time = timer()
scorer = nlp_loaded.evaluate(dev_docs)
end_time = timer()
if use_gpu < 0:
gpu_wps = None
cpu_wps = nwords/(end_time-start_time)
else:
gpu_wps = nwords/(end_time-start_time)
with Model.use_device('cpu'):
nlp_loaded = lang_class(pipeline=pipeline)
nlp_loaded = nlp_loaded.from_disk(epoch_model_path)
dev_docs = list(corpus.dev_docs(
nlp_loaded, gold_preproc=gold_preproc))
start_time = timer()
scorer = nlp_loaded.evaluate(dev_docs)
end_time = timer()
cpu_wps = nwords/(end_time-start_time)
acc_loc =(output_path / ('model%d' % i) / 'accuracy.json') acc_loc =(output_path / ('model%d' % i) / 'accuracy.json')
with acc_loc.open('w') as file_: with acc_loc.open('w') as file_:
file_.write(json_dumps(scorer.scores)) file_.write(json_dumps(scorer.scores))
meta_loc = output_path / ('model%d' % i) / 'meta.json' meta_loc = output_path / ('model%d' % i) / 'meta.json'
meta['accuracy'] = scorer.scores meta['accuracy'] = scorer.scores
meta['speed'] = {'nwords': nwords, 'cpu':cpu_wps, 'gpu': gpu_wps}
meta['lang'] = nlp.lang meta['lang'] = nlp.lang
meta['pipeline'] = pipeline meta['pipeline'] = pipeline
meta['spacy_version'] = '>=%s' % about.__version__ meta['spacy_version'] = '>=%s' % about.__version__
@ -132,7 +150,7 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=10, n_sents=0,
with meta_loc.open('w') as file_: with meta_loc.open('w') as file_:
file_.write(json_dumps(meta)) file_.write(json_dumps(meta))
util.set_env_log(True) util.set_env_log(True)
print_progress(i, losses, scorer.scores) print_progress(i, losses, scorer.scores, cpu_wps=cpu_wps, gpu_wps=gpu_wps)
finally: finally:
print("Saving model...") print("Saving model...")
try: try:
@ -153,16 +171,18 @@ def _render_parses(i, to_render):
file_.write(html) file_.write(html)
def print_progress(itn, losses, dev_scores, wps=0.0): def print_progress(itn, losses, dev_scores, cpu_wps=0.0, gpu_wps=0.0):
print(locals())
scores = {} scores = {}
for col in ['dep_loss', 'tag_loss', 'uas', 'tags_acc', 'token_acc', for col in ['dep_loss', 'tag_loss', 'uas', 'tags_acc', 'token_acc',
'ents_p', 'ents_r', 'ents_f', 'wps']: 'ents_p', 'ents_r', 'ents_f', 'cpu_wps', 'gpu_wps']:
scores[col] = 0.0 scores[col] = 0.0
scores['dep_loss'] = losses.get('parser', 0.0) scores['dep_loss'] = losses.get('parser', 0.0)
scores['ner_loss'] = losses.get('ner', 0.0) scores['ner_loss'] = losses.get('ner', 0.0)
scores['tag_loss'] = losses.get('tagger', 0.0) scores['tag_loss'] = losses.get('tagger', 0.0)
scores.update(dev_scores) scores.update(dev_scores)
scores['wps'] = wps scores['cpu_wps'] = cpu_wps
scores['gpu_wps'] = gpu_wps or 0.0
tpl = '\t'.join(( tpl = '\t'.join((
'{:d}', '{:d}',
'{dep_loss:.3f}', '{dep_loss:.3f}',
@ -173,7 +193,9 @@ def print_progress(itn, losses, dev_scores, wps=0.0):
'{ents_f:.3f}', '{ents_f:.3f}',
'{tags_acc:.3f}', '{tags_acc:.3f}',
'{token_acc:.3f}', '{token_acc:.3f}',
'{wps:.1f}')) '{cpu_wps:.1f}',
'{gpu_wps:.1f}',
))
print(tpl.format(itn, **scores)) print(tpl.format(itn, **scores))