Add words and seconds to train info

This commit is contained in:
Matthew Honnibal 2020-09-08 15:24:47 +02:00
parent b470062153
commit ba5f4c9b32

View File

@ -1,4 +1,5 @@
from typing import Optional, Dict, Any, Tuple, Union, Callable, List from typing import Optional, Dict, Any, Tuple, Union, Callable, List
from timeit import default_timer as timer
import srsly import srsly
import tqdm import tqdm
from pathlib import Path from pathlib import Path
@ -286,9 +287,12 @@ def train_while_improving(
] ]
raw_batches = util.minibatch(raw_examples, size=8) raw_batches = util.minibatch(raw_examples, size=8)
words_seen = 0
start_time = timer()
for step, (epoch, batch) in enumerate(train_data): for step, (epoch, batch) in enumerate(train_data):
dropout = next(dropouts) dropout = next(dropouts)
for subbatch in subdivide_batch(batch, accumulate_gradient): for subbatch in subdivide_batch(batch, accumulate_gradient):
nlp.update( nlp.update(
subbatch, drop=dropout, losses=losses, sgd=False, exclude=exclude subbatch, drop=dropout, losses=losses, sgd=False, exclude=exclude
) )
@ -317,6 +321,7 @@ def train_while_improving(
else: else:
score, other_scores = (None, None) score, other_scores = (None, None)
is_best_checkpoint = None is_best_checkpoint = None
words_seen += sum(len(eg) for eg in batch)
info = { info = {
"epoch": epoch, "epoch": epoch,
"step": step, "step": step,
@ -324,6 +329,8 @@ def train_while_improving(
"other_scores": other_scores, "other_scores": other_scores,
"losses": losses, "losses": losses,
"checkpoints": results, "checkpoints": results,
"seconds": int(timer() - start_time),
"words": words_seen,
} }
yield batch, info, is_best_checkpoint yield batch, info, is_best_checkpoint
if is_best_checkpoint is not None: if is_best_checkpoint is not None: