mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-24 08:14:15 +03:00
Fix train loop to avoid swallowing tracebacks (#6693)
* Avoid swallowing tracebacks in train loop * Format * Handle first
This commit is contained in:
parent
a612a5ba3f
commit
c04bab6bae
|
@ -59,6 +59,19 @@ def train(
|
||||||
batcher = T["batcher"]
|
batcher = T["batcher"]
|
||||||
train_logger = T["logger"]
|
train_logger = T["logger"]
|
||||||
before_to_disk = create_before_to_disk_callback(T["before_to_disk"])
|
before_to_disk = create_before_to_disk_callback(T["before_to_disk"])
|
||||||
|
|
||||||
|
# Helper function to save checkpoints. This is a closure for convenience,
|
||||||
|
# to avoid passing in all the args all the time.
|
||||||
|
def save_checkpoint(is_best):
|
||||||
|
with nlp.use_params(optimizer.averages):
|
||||||
|
before_to_disk(nlp).to_disk(output_path / DIR_MODEL_LAST)
|
||||||
|
if is_best:
|
||||||
|
# Avoid saving twice (saving will be more expensive than
|
||||||
|
# the dir copy)
|
||||||
|
if (output_path / DIR_MODEL_BEST).exists():
|
||||||
|
shutil.rmtree(output_path / DIR_MODEL_BEST)
|
||||||
|
shutil.copytree(output_path / DIR_MODEL_LAST, output_path / DIR_MODEL_BEST)
|
||||||
|
|
||||||
# Components that shouldn't be updated during training
|
# Components that shouldn't be updated during training
|
||||||
frozen_components = T["frozen_components"]
|
frozen_components = T["frozen_components"]
|
||||||
# Create iterator, which yields out info after each optimization step.
|
# Create iterator, which yields out info after each optimization step.
|
||||||
|
@ -87,40 +100,31 @@ def train(
|
||||||
if is_best_checkpoint is not None and output_path is not None:
|
if is_best_checkpoint is not None and output_path is not None:
|
||||||
with nlp.select_pipes(disable=frozen_components):
|
with nlp.select_pipes(disable=frozen_components):
|
||||||
update_meta(T, nlp, info)
|
update_meta(T, nlp, info)
|
||||||
with nlp.use_params(optimizer.averages):
|
save_checkpoint(is_best_checkpoint)
|
||||||
nlp = before_to_disk(nlp)
|
|
||||||
nlp.to_disk(output_path / DIR_MODEL_LAST)
|
|
||||||
if is_best_checkpoint:
|
|
||||||
with nlp.use_params(optimizer.averages):
|
|
||||||
nlp.to_disk(output_path / DIR_MODEL_BEST)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if output_path is not None:
|
if output_path is not None:
|
||||||
# We don't want to swallow the traceback if we don't have a
|
|
||||||
# specific error, but we do want to warn that we're trying
|
|
||||||
# to do something here.
|
|
||||||
stdout.write(
|
stdout.write(
|
||||||
msg.warn(
|
msg.warn(
|
||||||
f"Aborting and saving the final best model. "
|
f"Aborting and saving the final best model. "
|
||||||
f"Encountered exception: {str(e)}"
|
f"Encountered exception: {repr(e)}"
|
||||||
)
|
)
|
||||||
+ "\n"
|
+ "\n"
|
||||||
)
|
)
|
||||||
raise e
|
raise e
|
||||||
finally:
|
finally:
|
||||||
finalize_logger()
|
finalize_logger()
|
||||||
if optimizer.averages:
|
save_checkpoint(False)
|
||||||
nlp.use_params(optimizer.averages)
|
# This will only run if we did't hit an error
|
||||||
if output_path is not None:
|
if optimizer.averages:
|
||||||
final_model_path = output_path / DIR_MODEL_LAST
|
nlp.use_params(optimizer.averages)
|
||||||
nlp.to_disk(final_model_path)
|
if output_path is not None:
|
||||||
# This will only run if we don't hit an error
|
stdout.write(
|
||||||
stdout.write(
|
msg.good("Saved pipeline to output directory", output_path / DIR_MODEL_LAST)
|
||||||
msg.good("Saved pipeline to output directory", final_model_path) + "\n"
|
+ "\n"
|
||||||
)
|
)
|
||||||
return (nlp, final_model_path)
|
return (nlp, output_path / DIR_MODEL_LAST)
|
||||||
else:
|
else:
|
||||||
return (nlp, None)
|
return (nlp, None)
|
||||||
|
|
||||||
|
|
||||||
def train_while_improving(
|
def train_while_improving(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user