mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-25 00:34:20 +03:00
Improve control of training progress and logging (#6184)
* Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type
This commit is contained in:
parent
5fb776556a
commit
db419f6b2f
|
@ -3,6 +3,7 @@ from pathlib import Path
|
|||
from wasabi import msg
|
||||
import typer
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
|
||||
from ._util import import_code, setup_gpu
|
||||
|
@ -39,7 +40,12 @@ def train_cli(
|
|||
DOCS: https://nightly.spacy.io/api/cli#train
|
||||
"""
|
||||
util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
|
||||
verify_cli_args(config_path, output_path)
|
||||
# Make sure all files and paths exists if they are needed
|
||||
if not config_path or not config_path.exists():
|
||||
msg.fail("Config file not found", config_path, exits=1)
|
||||
if output_path is not None and not output_path.exists():
|
||||
output_path.mkdir()
|
||||
msg.good(f"Created output directory: {output_path}")
|
||||
overrides = parse_config_overrides(ctx.args)
|
||||
import_code(code_path)
|
||||
setup_gpu(use_gpu)
|
||||
|
@ -50,14 +56,4 @@ def train_cli(
|
|||
nlp = init_nlp(config, use_gpu=use_gpu)
|
||||
msg.good("Initialized pipeline")
|
||||
msg.divider("Training pipeline")
|
||||
train(nlp, output_path, use_gpu=use_gpu, silent=False)
|
||||
|
||||
|
||||
def verify_cli_args(config_path: Path, output_path: Optional[Path] = None) -> None:
|
||||
# Make sure all files and paths exists if they are needed
|
||||
if not config_path or not config_path.exists():
|
||||
msg.fail("Config file not found", config_path, exits=1)
|
||||
if output_path is not None:
|
||||
if not output_path.exists():
|
||||
output_path.mkdir()
|
||||
msg.good(f"Created output directory: {output_path}")
|
||||
train(nlp, output_path, use_gpu=use_gpu, stdout=sys.stdout, stderr=sys.stderr)
|
||||
|
|
|
@ -102,7 +102,7 @@ def load_vectors_into_model(
|
|||
"with the packaged vectors. Make sure that the vectors package you're "
|
||||
"loading is compatible with the current version of spaCy."
|
||||
)
|
||||
err = ConfigValidationError.from_error(config=None, title=title, desc=desc)
|
||||
err = ConfigValidationError.from_error(e, config=None, title=title, desc=desc)
|
||||
raise err from None
|
||||
nlp.vocab.vectors = vectors_nlp.vocab.vectors
|
||||
if add_strings:
|
||||
|
|
|
@ -1,18 +1,24 @@
|
|||
from typing import Dict, Any, Tuple, Callable, List
|
||||
from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO
|
||||
import wasabi
|
||||
import tqdm
|
||||
import sys
|
||||
|
||||
from ..util import registry
|
||||
from .. import util
|
||||
from ..errors import Errors
|
||||
from wasabi import msg
|
||||
|
||||
|
||||
@registry.loggers("spacy.ConsoleLogger.v1")
|
||||
def console_logger():
|
||||
def console_logger(progress_bar: bool=False):
|
||||
def setup_printer(
|
||||
nlp: "Language",
|
||||
) -> Tuple[Callable[[Dict[str, Any]], None], Callable]:
|
||||
stdout: IO=sys.stdout,
|
||||
stderr: IO=sys.stderr
|
||||
) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable]:
|
||||
msg = wasabi.Printer(no_print=True)
|
||||
# we assume here that only components are enabled that should be trained & logged
|
||||
logged_pipes = nlp.pipe_names
|
||||
eval_frequency = nlp.config["training"]["eval_frequency"]
|
||||
score_weights = nlp.config["training"]["score_weights"]
|
||||
score_cols = [col for col, value in score_weights.items() if value is not None]
|
||||
score_widths = [max(len(col), 6) for col in score_cols]
|
||||
|
@ -22,10 +28,18 @@ def console_logger():
|
|||
table_header = [col.upper() for col in table_header]
|
||||
table_widths = [3, 6] + loss_widths + score_widths + [6]
|
||||
table_aligns = ["r" for _ in table_widths]
|
||||
msg.row(table_header, widths=table_widths)
|
||||
msg.row(["-" * width for width in table_widths])
|
||||
stdout.write(msg.row(table_header, widths=table_widths))
|
||||
stdout.write(msg.row(["-" * width for width in table_widths]))
|
||||
progress = None
|
||||
|
||||
def log_step(info: Dict[str, Any]):
|
||||
def log_step(info: Optional[Dict[str, Any]]):
|
||||
nonlocal progress
|
||||
|
||||
if info is None:
|
||||
# If we don't have a new checkpoint, just return.
|
||||
if progress is not None:
|
||||
progress.update(1)
|
||||
return
|
||||
try:
|
||||
losses = [
|
||||
"{0:.2f}".format(float(info["losses"][pipe_name]))
|
||||
|
@ -39,24 +53,37 @@ def console_logger():
|
|||
keys=list(info["losses"].keys()),
|
||||
)
|
||||
) from None
|
||||
|
||||
scores = []
|
||||
for col in score_cols:
|
||||
score = info["other_scores"].get(col, 0.0)
|
||||
try:
|
||||
score = float(score)
|
||||
if col != "speed":
|
||||
score *= 100
|
||||
scores.append("{0:.2f}".format(score))
|
||||
except TypeError:
|
||||
err = Errors.E916.format(name=col, score_type=type(score))
|
||||
raise ValueError(err) from None
|
||||
if col != "speed":
|
||||
score *= 100
|
||||
scores.append("{0:.2f}".format(score))
|
||||
|
||||
data = (
|
||||
[info["epoch"], info["step"]]
|
||||
+ losses
|
||||
+ scores
|
||||
+ ["{0:.2f}".format(float(info["score"]))]
|
||||
)
|
||||
msg.row(data, widths=table_widths, aligns=table_aligns)
|
||||
if progress is not None:
|
||||
progress.close()
|
||||
stdout.write(msg.row(data, widths=table_widths, aligns=table_aligns))
|
||||
if progress_bar:
|
||||
# Set disable=None, so that it disables on non-TTY
|
||||
progress = tqdm.tqdm(
|
||||
total=eval_frequency,
|
||||
disable=None,
|
||||
leave=False,
|
||||
file=stderr
|
||||
)
|
||||
progress.set_description(f"Epoch {info['epoch']+1}")
|
||||
|
||||
def finalize():
|
||||
pass
|
||||
|
@ -70,10 +97,12 @@ def console_logger():
|
|||
def wandb_logger(project_name: str, remove_config_values: List[str] = []):
|
||||
import wandb
|
||||
|
||||
console = console_logger()
|
||||
console = console_logger(progress_bar=False)
|
||||
|
||||
def setup_logger(
|
||||
nlp: "Language",
|
||||
stdout: IO=sys.stdout,
|
||||
stderr: IO=sys.stderr
|
||||
) -> Tuple[Callable[[Dict[str, Any]], None], Callable]:
|
||||
config = nlp.config.interpolate()
|
||||
config_dot = util.dict_to_dot(config)
|
||||
|
@ -81,18 +110,19 @@ def wandb_logger(project_name: str, remove_config_values: List[str] = []):
|
|||
del config_dot[field]
|
||||
config = util.dot_to_dict(config_dot)
|
||||
wandb.init(project=project_name, config=config, reinit=True)
|
||||
console_log_step, console_finalize = console(nlp)
|
||||
console_log_step, console_finalize = console(nlp, stdout, stderr)
|
||||
|
||||
def log_step(info: Dict[str, Any]):
|
||||
def log_step(info: Optional[Dict[str, Any]]):
|
||||
console_log_step(info)
|
||||
score = info["score"]
|
||||
other_scores = info["other_scores"]
|
||||
losses = info["losses"]
|
||||
wandb.log({"score": score})
|
||||
if losses:
|
||||
wandb.log({f"loss_{k}": v for k, v in losses.items()})
|
||||
if isinstance(other_scores, dict):
|
||||
wandb.log(other_scores)
|
||||
if info is not None:
|
||||
score = info["score"]
|
||||
other_scores = info["other_scores"]
|
||||
losses = info["losses"]
|
||||
wandb.log({"score": score})
|
||||
if losses:
|
||||
wandb.log({f"loss_{k}": v for k, v in losses.items()})
|
||||
if isinstance(other_scores, dict):
|
||||
wandb.log(other_scores)
|
||||
|
||||
def finalize():
|
||||
console_finalize()
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
from typing import List, Callable, Tuple, Dict, Iterable, Iterator, Union, Any
|
||||
from typing import List, Callable, Tuple, Dict, Iterable, Iterator, Union, Any, IO
|
||||
from typing import Optional, TYPE_CHECKING
|
||||
from pathlib import Path
|
||||
from timeit import default_timer as timer
|
||||
from thinc.api import Optimizer, Config, constant, fix_random_seed, set_gpu_allocator
|
||||
import random
|
||||
import tqdm
|
||||
from wasabi import Printer
|
||||
import wasabi
|
||||
import sys
|
||||
|
||||
from .example import Example
|
||||
from ..schemas import ConfigSchemaTraining
|
||||
|
@ -21,7 +21,8 @@ def train(
|
|||
output_path: Optional[Path] = None,
|
||||
*,
|
||||
use_gpu: int = -1,
|
||||
silent: bool = False,
|
||||
stdout: IO=sys.stdout,
|
||||
stderr: IO=sys.stderr
|
||||
) -> None:
|
||||
"""Train a pipeline.
|
||||
|
||||
|
@ -29,10 +30,15 @@ def train(
|
|||
output_path (Path): Optional output path to save trained model to.
|
||||
use_gpu (int): Whether to train on GPU. Make sure to call require_gpu
|
||||
before calling this function.
|
||||
silent (bool): Whether to pretty-print outputs.
|
||||
stdout (file): A file-like object to write output messages. To disable
|
||||
printing, set to io.StringIO.
|
||||
stderr (file): A second file-like object to write output messages. To disable
|
||||
printing, set to io.StringIO.
|
||||
|
||||
RETURNS (Path / None): The path to the final exported model.
|
||||
"""
|
||||
msg = Printer(no_print=silent)
|
||||
# We use no_print here so we can respect the stdout/stderr options.
|
||||
msg = wasabi.Printer(no_print=True)
|
||||
# Create iterator, which yields out info after each optimization step.
|
||||
config = nlp.config.interpolate()
|
||||
if config["training"]["seed"] is not None:
|
||||
|
@ -63,50 +69,44 @@ def train(
|
|||
eval_frequency=T["eval_frequency"],
|
||||
exclude=frozen_components,
|
||||
)
|
||||
msg.info(f"Pipeline: {nlp.pipe_names}")
|
||||
stdout.write(msg.info(f"Pipeline: {nlp.pipe_names}"))
|
||||
if frozen_components:
|
||||
msg.info(f"Frozen components: {frozen_components}")
|
||||
msg.info(f"Initial learn rate: {optimizer.learn_rate}")
|
||||
stdout.write(msg.info(f"Frozen components: {frozen_components}"))
|
||||
stdout.write(msg.info(f"Initial learn rate: {optimizer.learn_rate}"))
|
||||
with nlp.select_pipes(disable=frozen_components):
|
||||
print_row, finalize_logger = train_logger(nlp)
|
||||
log_step, finalize_logger = train_logger(nlp, stdout, stderr)
|
||||
try:
|
||||
progress = tqdm.tqdm(total=T["eval_frequency"], leave=False)
|
||||
progress.set_description(f"Epoch 1")
|
||||
for batch, info, is_best_checkpoint in training_step_iterator:
|
||||
progress.update(1)
|
||||
if is_best_checkpoint is not None:
|
||||
progress.close()
|
||||
print_row(info)
|
||||
if is_best_checkpoint and output_path is not None:
|
||||
with nlp.select_pipes(disable=frozen_components):
|
||||
update_meta(T, nlp, info)
|
||||
with nlp.use_params(optimizer.averages):
|
||||
nlp = before_to_disk(nlp)
|
||||
nlp.to_disk(output_path / "model-best")
|
||||
progress = tqdm.tqdm(total=T["eval_frequency"], leave=False)
|
||||
progress.set_description(f"Epoch {info['epoch']}")
|
||||
log_step(info if is_best_checkpoint else None)
|
||||
if is_best_checkpoint is not None and output_path is not None:
|
||||
with nlp.select_pipes(disable=frozen_components):
|
||||
update_meta(T, nlp, info)
|
||||
with nlp.use_params(optimizer.averages):
|
||||
nlp = before_to_disk(nlp)
|
||||
nlp.to_disk(output_path / "model-best")
|
||||
except Exception as e:
|
||||
finalize_logger()
|
||||
if output_path is not None:
|
||||
# We don't want to swallow the traceback if we don't have a
|
||||
# specific error.
|
||||
msg.warn(
|
||||
f"Aborting and saving the final best model. "
|
||||
f"Encountered exception: {str(e)}"
|
||||
# specific error, but we do want to warn that we're trying
|
||||
# to do something here.
|
||||
stdout.write(
|
||||
msg.warn(
|
||||
f"Aborting and saving the final best model. "
|
||||
f"Encountered exception: {str(e)}"
|
||||
)
|
||||
)
|
||||
nlp = before_to_disk(nlp)
|
||||
nlp.to_disk(output_path / "model-final")
|
||||
raise e
|
||||
finally:
|
||||
finalize_logger()
|
||||
if output_path is not None:
|
||||
final_model_path = output_path / "model-final"
|
||||
final_model_path = output_path / "model-last"
|
||||
if optimizer.averages:
|
||||
with nlp.use_params(optimizer.averages):
|
||||
nlp.to_disk(final_model_path)
|
||||
else:
|
||||
nlp.to_disk(final_model_path)
|
||||
msg.good(f"Saved pipeline to output directory", final_model_path)
|
||||
# This will only run if we don't hit an error
|
||||
stdout.write(msg.good("Saved pipeline to output directory", final_model_path))
|
||||
|
||||
|
||||
def train_while_improving(
|
||||
|
|
|
@ -689,8 +689,8 @@ During training, the results of each step are passed to a logger function. By
|
|||
default, these results are written to the console with the
|
||||
[`ConsoleLogger`](/api/top-level#ConsoleLogger). There is also built-in support
|
||||
for writing the log files to [Weights & Biases](https://www.wandb.com/) with the
|
||||
[`WandbLogger`](/api/top-level#WandbLogger). The logger function receives a
|
||||
**dictionary** with the following keys:
|
||||
[`WandbLogger`](/api/top-level#WandbLogger). On each step, the logger function
|
||||
receives a **dictionary** with the following keys:
|
||||
|
||||
| Key | Value |
|
||||
| -------------- | ---------------------------------------------------------------------------------------------- |
|
||||
|
@ -715,30 +715,37 @@ tabular results to a file:
|
|||
|
||||
```python
|
||||
### functions.py
|
||||
from typing import Tuple, Callable, Dict, Any
|
||||
import sys
|
||||
from typing import IO, Tuple, Callable, Dict, Any
|
||||
import spacy
|
||||
from spacy import Language
|
||||
from pathlib import Path
|
||||
|
||||
@spacy.registry.loggers("my_custom_logger.v1")
|
||||
def custom_logger(log_path):
|
||||
def setup_logger(nlp: "Language") -> Tuple[Callable, Callable]:
|
||||
with Path(log_path).open("w", encoding="utf8") as file_:
|
||||
file_.write("step\\t")
|
||||
file_.write("score\\t")
|
||||
for pipe in nlp.pipe_names:
|
||||
file_.write(f"loss_{pipe}\\t")
|
||||
file_.write("\\n")
|
||||
def setup_logger(
|
||||
nlp: Language,
|
||||
stdout: IO=sys.stdout,
|
||||
stderr: IO=sys.stderr
|
||||
) -> Tuple[Callable, Callable]:
|
||||
stdout.write(f"Logging to {log_path}\n")
|
||||
log_file = Path(log_path).open("w", encoding="utf8")
|
||||
log_file.write("step\\t")
|
||||
log_file.write("score\\t")
|
||||
for pipe in nlp.pipe_names:
|
||||
log_file.write(f"loss_{pipe}\\t")
|
||||
log_file.write("\\n")
|
||||
|
||||
def log_step(info: Dict[str, Any]):
|
||||
with Path(log_path).open("a") as file_:
|
||||
file_.write(f"{info['step']}\\t")
|
||||
file_.write(f"{info['score']}\\t")
|
||||
def log_step(info: Optional[Dict[str, Any]]):
|
||||
if info:
|
||||
log_file.write(f"{info['step']}\\t")
|
||||
log_file.write(f"{info['score']}\\t")
|
||||
for pipe in nlp.pipe_names:
|
||||
file_.write(f"{info['losses'][pipe]}\\t")
|
||||
file_.write("\\n")
|
||||
log_file.write(f"{info['losses'][pipe]}\\t")
|
||||
log_file.write("\\n")
|
||||
|
||||
def finalize():
|
||||
pass
|
||||
log_file.close()
|
||||
|
||||
return log_step, finalize
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user