Improve control of training progress and logging (#6184)

* Make logging and progress easier to control

* Update docs

* Cleanup errors

* Fix ConfigValidationError

* Pass stdout/stderr, not wasabi.Printer

* Fix type

* Upd logging example

* Fix logger example

* Fix type
This commit is contained in:
Matthew Honnibal 2020-10-03 14:57:46 +02:00 committed by GitHub
parent 5fb776556a
commit db419f6b2f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 118 additions and 85 deletions

View File

@ -3,6 +3,7 @@ from pathlib import Path
from wasabi import msg
import typer
import logging
import sys
from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
from ._util import import_code, setup_gpu
@ -39,7 +40,12 @@ def train_cli(
DOCS: https://nightly.spacy.io/api/cli#train
"""
util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
verify_cli_args(config_path, output_path)
# Make sure all files and paths exists if they are needed
if not config_path or not config_path.exists():
msg.fail("Config file not found", config_path, exits=1)
if output_path is not None and not output_path.exists():
output_path.mkdir()
msg.good(f"Created output directory: {output_path}")
overrides = parse_config_overrides(ctx.args)
import_code(code_path)
setup_gpu(use_gpu)
@ -50,14 +56,4 @@ def train_cli(
nlp = init_nlp(config, use_gpu=use_gpu)
msg.good("Initialized pipeline")
msg.divider("Training pipeline")
train(nlp, output_path, use_gpu=use_gpu, silent=False)
def verify_cli_args(config_path: Path, output_path: Optional[Path] = None) -> None:
# Make sure all files and paths exists if they are needed
if not config_path or not config_path.exists():
msg.fail("Config file not found", config_path, exits=1)
if output_path is not None:
if not output_path.exists():
output_path.mkdir()
msg.good(f"Created output directory: {output_path}")
train(nlp, output_path, use_gpu=use_gpu, stdout=sys.stdout, stderr=sys.stderr)

View File

@ -102,7 +102,7 @@ def load_vectors_into_model(
"with the packaged vectors. Make sure that the vectors package you're "
"loading is compatible with the current version of spaCy."
)
err = ConfigValidationError.from_error(config=None, title=title, desc=desc)
err = ConfigValidationError.from_error(e, config=None, title=title, desc=desc)
raise err from None
nlp.vocab.vectors = vectors_nlp.vocab.vectors
if add_strings:

View File

@ -1,18 +1,24 @@
from typing import Dict, Any, Tuple, Callable, List
from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO
import wasabi
import tqdm
import sys
from ..util import registry
from .. import util
from ..errors import Errors
from wasabi import msg
@registry.loggers("spacy.ConsoleLogger.v1")
def console_logger():
def console_logger(progress_bar: bool=False):
def setup_printer(
nlp: "Language",
) -> Tuple[Callable[[Dict[str, Any]], None], Callable]:
stdout: IO=sys.stdout,
stderr: IO=sys.stderr
) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable]:
msg = wasabi.Printer(no_print=True)
# we assume here that only components are enabled that should be trained & logged
logged_pipes = nlp.pipe_names
eval_frequency = nlp.config["training"]["eval_frequency"]
score_weights = nlp.config["training"]["score_weights"]
score_cols = [col for col, value in score_weights.items() if value is not None]
score_widths = [max(len(col), 6) for col in score_cols]
@ -22,10 +28,18 @@ def console_logger():
table_header = [col.upper() for col in table_header]
table_widths = [3, 6] + loss_widths + score_widths + [6]
table_aligns = ["r" for _ in table_widths]
msg.row(table_header, widths=table_widths)
msg.row(["-" * width for width in table_widths])
stdout.write(msg.row(table_header, widths=table_widths))
stdout.write(msg.row(["-" * width for width in table_widths]))
progress = None
def log_step(info: Dict[str, Any]):
def log_step(info: Optional[Dict[str, Any]]):
nonlocal progress
if info is None:
# If we don't have a new checkpoint, just return.
if progress is not None:
progress.update(1)
return
try:
losses = [
"{0:.2f}".format(float(info["losses"][pipe_name]))
@ -39,24 +53,37 @@ def console_logger():
keys=list(info["losses"].keys()),
)
) from None
scores = []
for col in score_cols:
score = info["other_scores"].get(col, 0.0)
try:
score = float(score)
if col != "speed":
score *= 100
scores.append("{0:.2f}".format(score))
except TypeError:
err = Errors.E916.format(name=col, score_type=type(score))
raise ValueError(err) from None
if col != "speed":
score *= 100
scores.append("{0:.2f}".format(score))
data = (
[info["epoch"], info["step"]]
+ losses
+ scores
+ ["{0:.2f}".format(float(info["score"]))]
)
msg.row(data, widths=table_widths, aligns=table_aligns)
if progress is not None:
progress.close()
stdout.write(msg.row(data, widths=table_widths, aligns=table_aligns))
if progress_bar:
# Set disable=None, so that it disables on non-TTY
progress = tqdm.tqdm(
total=eval_frequency,
disable=None,
leave=False,
file=stderr
)
progress.set_description(f"Epoch {info['epoch']+1}")
def finalize():
pass
@ -70,10 +97,12 @@ def console_logger():
def wandb_logger(project_name: str, remove_config_values: List[str] = []):
import wandb
console = console_logger()
console = console_logger(progress_bar=False)
def setup_logger(
nlp: "Language",
stdout: IO=sys.stdout,
stderr: IO=sys.stderr
) -> Tuple[Callable[[Dict[str, Any]], None], Callable]:
config = nlp.config.interpolate()
config_dot = util.dict_to_dot(config)
@ -81,18 +110,19 @@ def wandb_logger(project_name: str, remove_config_values: List[str] = []):
del config_dot[field]
config = util.dot_to_dict(config_dot)
wandb.init(project=project_name, config=config, reinit=True)
console_log_step, console_finalize = console(nlp)
console_log_step, console_finalize = console(nlp, stdout, stderr)
def log_step(info: Dict[str, Any]):
def log_step(info: Optional[Dict[str, Any]]):
console_log_step(info)
score = info["score"]
other_scores = info["other_scores"]
losses = info["losses"]
wandb.log({"score": score})
if losses:
wandb.log({f"loss_{k}": v for k, v in losses.items()})
if isinstance(other_scores, dict):
wandb.log(other_scores)
if info is not None:
score = info["score"]
other_scores = info["other_scores"]
losses = info["losses"]
wandb.log({"score": score})
if losses:
wandb.log({f"loss_{k}": v for k, v in losses.items()})
if isinstance(other_scores, dict):
wandb.log(other_scores)
def finalize():
console_finalize()

View File

@ -1,11 +1,11 @@
from typing import List, Callable, Tuple, Dict, Iterable, Iterator, Union, Any
from typing import List, Callable, Tuple, Dict, Iterable, Iterator, Union, Any, IO
from typing import Optional, TYPE_CHECKING
from pathlib import Path
from timeit import default_timer as timer
from thinc.api import Optimizer, Config, constant, fix_random_seed, set_gpu_allocator
import random
import tqdm
from wasabi import Printer
import wasabi
import sys
from .example import Example
from ..schemas import ConfigSchemaTraining
@ -21,7 +21,8 @@ def train(
output_path: Optional[Path] = None,
*,
use_gpu: int = -1,
silent: bool = False,
stdout: IO=sys.stdout,
stderr: IO=sys.stderr
) -> None:
"""Train a pipeline.
@ -29,10 +30,15 @@ def train(
output_path (Path): Optional output path to save trained model to.
use_gpu (int): Whether to train on GPU. Make sure to call require_gpu
before calling this function.
silent (bool): Whether to pretty-print outputs.
stdout (file): A file-like object to write output messages. To disable
printing, set to io.StringIO.
stderr (file): A second file-like object to write output messages. To disable
printing, set to io.StringIO.
RETURNS (Path / None): The path to the final exported model.
"""
msg = Printer(no_print=silent)
# We use no_print here so we can respect the stdout/stderr options.
msg = wasabi.Printer(no_print=True)
# Create iterator, which yields out info after each optimization step.
config = nlp.config.interpolate()
if config["training"]["seed"] is not None:
@ -63,50 +69,44 @@ def train(
eval_frequency=T["eval_frequency"],
exclude=frozen_components,
)
msg.info(f"Pipeline: {nlp.pipe_names}")
stdout.write(msg.info(f"Pipeline: {nlp.pipe_names}"))
if frozen_components:
msg.info(f"Frozen components: {frozen_components}")
msg.info(f"Initial learn rate: {optimizer.learn_rate}")
stdout.write(msg.info(f"Frozen components: {frozen_components}"))
stdout.write(msg.info(f"Initial learn rate: {optimizer.learn_rate}"))
with nlp.select_pipes(disable=frozen_components):
print_row, finalize_logger = train_logger(nlp)
log_step, finalize_logger = train_logger(nlp, stdout, stderr)
try:
progress = tqdm.tqdm(total=T["eval_frequency"], leave=False)
progress.set_description(f"Epoch 1")
for batch, info, is_best_checkpoint in training_step_iterator:
progress.update(1)
if is_best_checkpoint is not None:
progress.close()
print_row(info)
if is_best_checkpoint and output_path is not None:
with nlp.select_pipes(disable=frozen_components):
update_meta(T, nlp, info)
with nlp.use_params(optimizer.averages):
nlp = before_to_disk(nlp)
nlp.to_disk(output_path / "model-best")
progress = tqdm.tqdm(total=T["eval_frequency"], leave=False)
progress.set_description(f"Epoch {info['epoch']}")
log_step(info if is_best_checkpoint else None)
if is_best_checkpoint is not None and output_path is not None:
with nlp.select_pipes(disable=frozen_components):
update_meta(T, nlp, info)
with nlp.use_params(optimizer.averages):
nlp = before_to_disk(nlp)
nlp.to_disk(output_path / "model-best")
except Exception as e:
finalize_logger()
if output_path is not None:
# We don't want to swallow the traceback if we don't have a
# specific error.
msg.warn(
f"Aborting and saving the final best model. "
f"Encountered exception: {str(e)}"
# specific error, but we do want to warn that we're trying
# to do something here.
stdout.write(
msg.warn(
f"Aborting and saving the final best model. "
f"Encountered exception: {str(e)}"
)
)
nlp = before_to_disk(nlp)
nlp.to_disk(output_path / "model-final")
raise e
finally:
finalize_logger()
if output_path is not None:
final_model_path = output_path / "model-final"
final_model_path = output_path / "model-last"
if optimizer.averages:
with nlp.use_params(optimizer.averages):
nlp.to_disk(final_model_path)
else:
nlp.to_disk(final_model_path)
msg.good(f"Saved pipeline to output directory", final_model_path)
# This will only run if we don't hit an error
stdout.write(msg.good("Saved pipeline to output directory", final_model_path))
def train_while_improving(

View File

@ -689,8 +689,8 @@ During training, the results of each step are passed to a logger function. By
default, these results are written to the console with the
[`ConsoleLogger`](/api/top-level#ConsoleLogger). There is also built-in support
for writing the log files to [Weights & Biases](https://www.wandb.com/) with the
[`WandbLogger`](/api/top-level#WandbLogger). The logger function receives a
**dictionary** with the following keys:
[`WandbLogger`](/api/top-level#WandbLogger). On each step, the logger function
receives a **dictionary** with the following keys:
| Key | Value |
| -------------- | ---------------------------------------------------------------------------------------------- |
@ -715,30 +715,37 @@ tabular results to a file:
```python
### functions.py
from typing import Tuple, Callable, Dict, Any
import sys
from typing import IO, Tuple, Callable, Dict, Any
import spacy
from spacy import Language
from pathlib import Path
@spacy.registry.loggers("my_custom_logger.v1")
def custom_logger(log_path):
def setup_logger(nlp: "Language") -> Tuple[Callable, Callable]:
with Path(log_path).open("w", encoding="utf8") as file_:
file_.write("step\\t")
file_.write("score\\t")
for pipe in nlp.pipe_names:
file_.write(f"loss_{pipe}\\t")
file_.write("\\n")
def setup_logger(
nlp: Language,
stdout: IO=sys.stdout,
stderr: IO=sys.stderr
) -> Tuple[Callable, Callable]:
stdout.write(f"Logging to {log_path}\n")
log_file = Path(log_path).open("w", encoding="utf8")
log_file.write("step\\t")
log_file.write("score\\t")
for pipe in nlp.pipe_names:
log_file.write(f"loss_{pipe}\\t")
log_file.write("\\n")
def log_step(info: Dict[str, Any]):
with Path(log_path).open("a") as file_:
file_.write(f"{info['step']}\\t")
file_.write(f"{info['score']}\\t")
def log_step(info: Optional[Dict[str, Any]]):
if info:
log_file.write(f"{info['step']}\\t")
log_file.write(f"{info['score']}\\t")
for pipe in nlp.pipe_names:
file_.write(f"{info['losses'][pipe]}\\t")
file_.write("\\n")
log_file.write(f"{info['losses'][pipe]}\\t")
log_file.write("\\n")
def finalize():
pass
log_file.close()
return log_step, finalize