Fix train loop to avoid swallowing tracebacks (#6693)

* Avoid swallowing tracebacks in train loop

* Format

* Handle first
This commit is contained in:
Matthew Honnibal 2021-01-09 11:25:47 +11:00 committed by GitHub
parent a612a5ba3f
commit c04bab6bae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -59,6 +59,19 @@ def train(
batcher = T["batcher"] batcher = T["batcher"]
train_logger = T["logger"] train_logger = T["logger"]
before_to_disk = create_before_to_disk_callback(T["before_to_disk"]) before_to_disk = create_before_to_disk_callback(T["before_to_disk"])
# Helper function to save checkpoints. This is a closure for convenience,
# to avoid passing in all the args all the time.
def save_checkpoint(is_best):
with nlp.use_params(optimizer.averages):
before_to_disk(nlp).to_disk(output_path / DIR_MODEL_LAST)
if is_best:
# Avoid saving twice (saving will be more expensive than
# the dir copy)
if (output_path / DIR_MODEL_BEST).exists():
shutil.rmtree(output_path / DIR_MODEL_BEST)
shutil.copytree(output_path / DIR_MODEL_LAST, output_path / DIR_MODEL_BEST)
# Components that shouldn't be updated during training # Components that shouldn't be updated during training
frozen_components = T["frozen_components"] frozen_components = T["frozen_components"]
# Create iterator, which yields out info after each optimization step. # Create iterator, which yields out info after each optimization step.
@ -87,40 +100,31 @@ def train(
if is_best_checkpoint is not None and output_path is not None: if is_best_checkpoint is not None and output_path is not None:
with nlp.select_pipes(disable=frozen_components): with nlp.select_pipes(disable=frozen_components):
update_meta(T, nlp, info) update_meta(T, nlp, info)
with nlp.use_params(optimizer.averages): save_checkpoint(is_best_checkpoint)
nlp = before_to_disk(nlp)
nlp.to_disk(output_path / DIR_MODEL_LAST)
if is_best_checkpoint:
with nlp.use_params(optimizer.averages):
nlp.to_disk(output_path / DIR_MODEL_BEST)
except Exception as e: except Exception as e:
if output_path is not None: if output_path is not None:
# We don't want to swallow the traceback if we don't have a
# specific error, but we do want to warn that we're trying
# to do something here.
stdout.write( stdout.write(
msg.warn( msg.warn(
f"Aborting and saving the final best model. " f"Aborting and saving the final best model. "
f"Encountered exception: {str(e)}" f"Encountered exception: {repr(e)}"
) )
+ "\n" + "\n"
) )
raise e raise e
finally: finally:
finalize_logger() finalize_logger()
if optimizer.averages: save_checkpoint(False)
nlp.use_params(optimizer.averages) # This will only run if we did't hit an error
if output_path is not None: if optimizer.averages:
final_model_path = output_path / DIR_MODEL_LAST nlp.use_params(optimizer.averages)
nlp.to_disk(final_model_path) if output_path is not None:
# This will only run if we don't hit an error stdout.write(
stdout.write( msg.good("Saved pipeline to output directory", output_path / DIR_MODEL_LAST)
msg.good("Saved pipeline to output directory", final_model_path) + "\n" + "\n"
) )
return (nlp, final_model_path) return (nlp, output_path / DIR_MODEL_LAST)
else: else:
return (nlp, None) return (nlp, None)
def train_while_improving( def train_while_improving(