spaCy/spacy/training/loggers.py

from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO
from wasabi import Printer
import tqdm
import sys

from ..util import registry
from .. import util
from ..errors import Errors

if TYPE_CHECKING:
    from ..language import Language  # noqa: F401


def setup_table(
    *, cols: List[str], widths: List[int], max_width: int = 13
) -> Tuple[List[str], List[int], List[str]]:
    final_cols = []
    final_widths = []
    for col, width in zip(cols, widths):
        if len(col) > max_width:
            col = col[: max_width - 3] + "..."  # shorten column if too long
        final_cols.append(col.upper())
        final_widths.append(max(len(col), width))
    return final_cols, final_widths, ["r" for _ in final_widths]


@registry.loggers("spacy.ConsoleLogger.v1")
def console_logger(progress_bar: bool = False):
    def setup_printer(
        nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr
    ) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]]:
        write = lambda text: stdout.write(f"{text}\n")
        msg = Printer(no_print=True)
        # ensure that only trainable components are logged
        logged_pipes = [
            name
            for name, proc in nlp.pipeline
            if hasattr(proc, "is_trainable") and proc.is_trainable
        ]
        eval_frequency = nlp.config["training"]["eval_frequency"]
        score_weights = nlp.config["training"]["score_weights"]
        score_cols = [col for col, value in score_weights.items() if value is not None]
        loss_cols = [f"Loss {pipe}" for pipe in logged_pipes]
        spacing = 2
        table_header, table_widths, table_aligns = setup_table(
            cols=["E", "#"] + loss_cols + score_cols + ["Score"],
            widths=[3, 6] + [8 for _ in loss_cols] + [6 for _ in score_cols] + [6],
        )
        write(msg.row(table_header, widths=table_widths, spacing=spacing))
        write(msg.row(["-" * width for width in table_widths], spacing=spacing))
        progress = None

        def log_step(info: Optional[Dict[str, Any]]) -> None:
            nonlocal progress

            if info is None:
                # If we don't have a new checkpoint, just return.
                if progress is not None:
                    progress.update(1)
                return
            losses = [
                "{0:.2f}".format(float(info["losses"][pipe_name]))
                for pipe_name in logged_pipes
            ]

            scores = []
            for col in score_cols:
                score = info["other_scores"].get(col, 0.0)
                try:
                    score = float(score)
                except TypeError:
                    err = Errors.E916.format(name=col, score_type=type(score))
                    raise ValueError(err) from None
                if col != "speed":
                    score *= 100
                scores.append("{0:.2f}".format(score))

            data = (
                [info["epoch"], info["step"]]
                + losses
                + scores
                + ["{0:.2f}".format(float(info["score"]))]
            )
            if progress is not None:
                progress.close()
            write(
                msg.row(data, widths=table_widths, aligns=table_aligns, spacing=spacing)
            )
            if progress_bar:
                # Set disable=None, so that it disables on non-TTY
                progress = tqdm.tqdm(
                    total=eval_frequency, disable=None, leave=False, file=stderr
                )
                progress.set_description(f"Epoch {info['epoch']+1}")

        def finalize() -> None:
            pass

        return log_step, finalize

    return setup_printer


@registry.loggers("spacy.WandbLogger.v1")
def wandb_logger(project_name: str, remove_config_values: List[str] = []):
    import wandb

    console = console_logger(progress_bar=False)

    def setup_logger(
        nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr
    ) -> Tuple[Callable[[Dict[str, Any]], None], Callable[[], None]]:
        config = nlp.config.interpolate()
        config_dot = util.dict_to_dot(config)
        for field in remove_config_values:
            del config_dot[field]
        config = util.dot_to_dict(config_dot)
        wandb.init(project=project_name, config=config, reinit=True)
        console_log_step, console_finalize = console(nlp, stdout, stderr)

        def log_step(info: Optional[Dict[str, Any]]):
            console_log_step(info)
            if info is not None:
                score = info["score"]
                other_scores = info["other_scores"]
                losses = info["losses"]
                wandb.log({"score": score})
                if losses:
                    wandb.log({f"loss_{k}": v for k, v in losses.items()})
                if isinstance(other_scores, dict):
                    wandb.log(other_scores)

        def finalize() -> None:
            console_finalize()
            wandb.join()

        return log_step, finalize

    return setup_logger
Improve control of training progress and logging (#6184) * Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type 2020-10-03 15:57:46 +03:00			`from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO`
Tidy up, auto-format, types 2020-10-03 17:31:58 +03:00			`from wasabi import Printer`
Improve control of training progress and logging (#6184) * Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type 2020-10-03 15:57:46 +03:00			`import tqdm`
			`import sys`
Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00
			`from ..util import registry`
add disable_fields to wandb_logger 2020-08-28 14:55:32 +03:00			`from .. import util`
Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00			`from ..errors import Errors`

Tidy up, auto-format, types 2020-10-03 17:31:58 +03:00			`if TYPE_CHECKING:`
			`from ..language import Language # noqa: F401`

Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00
Make console logger table more compact 2020-10-11 13:55:46 +03:00			`def setup_table(`
			`*, cols: List[str], widths: List[int], max_width: int = 13`
			`) -> Tuple[List[str], List[int], List[str]]:`
			`final_cols = []`
			`final_widths = []`
			`for col, width in zip(cols, widths):`
			`if len(col) > max_width:`
			`col = col[: max_width - 3] + "..." # shorten column if too long`
			`final_cols.append(col.upper())`
			`final_widths.append(max(len(col), width))`
			`return final_cols, final_widths, ["r" for _ in final_widths]`


Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00			`@registry.loggers("spacy.ConsoleLogger.v1")`
Tidy up, auto-format, types 2020-10-03 17:31:58 +03:00			`def console_logger(progress_bar: bool = False):`
Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00			`def setup_printer(`
Tidy up, auto-format, types 2020-10-03 17:31:58 +03:00			`nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr`
			`) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]]:`
Make console logger table more compact 2020-10-11 13:55:46 +03:00			`write = lambda text: stdout.write(f"{text}\n")`
Tidy up, auto-format, types 2020-10-03 17:31:58 +03:00			`msg = Printer(no_print=True)`
is_trainable method 2020-10-05 18:43:42 +03:00			`# ensure that only trainable components are logged`
			`logged_pipes = [`
			`name`
			`for name, proc in nlp.pipeline`
TrainablePipe (#6213) * rename Pipe to TrainablePipe * split functionality between Pipe and TrainablePipe * remove unnecessary methods from certain components * cleanup * hasattr(component, "pipe") should be sufficient again * remove serialization and vocab/cfg from Pipe * unify _ensure_examples and validate_examples * small fixes * hasattr checks for self.cfg and self.vocab * make is_resizable and is_trainable properties * serialize strings.json instead of vocab * fix KB IO + tests * fix typos * more typos * _added_strings as a set * few more tests specifically for _added_strings field * bump to 3.0.0a36 2020-10-08 22:33:49 +03:00			`if hasattr(proc, "is_trainable") and proc.is_trainable`
is_trainable method 2020-10-05 18:43:42 +03:00			`]`
Improve control of training progress and logging (#6184) * Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type 2020-10-03 15:57:46 +03:00			`eval_frequency = nlp.config["training"]["eval_frequency"]`
Fix logging 2020-09-24 12:04:35 +03:00			`score_weights = nlp.config["training"]["score_weights"]`
			`score_cols = [col for col, value in score_weights.items() if value is not None]`
avoid logging performance of frozen components 2020-09-23 11:37:12 +03:00			`loss_cols = [f"Loss {pipe}" for pipe in logged_pipes]`
Make console logger table more compact 2020-10-11 13:55:46 +03:00			`spacing = 2`
			`table_header, table_widths, table_aligns = setup_table(`
			`cols=["E", "#"] + loss_cols + score_cols + ["Score"],`
			`widths=[3, 6] + [8 for _ in loss_cols] + [6 for _ in score_cols] + [6],`
			`)`
			`write(msg.row(table_header, widths=table_widths, spacing=spacing))`
			`write(msg.row(["-" * width for width in table_widths], spacing=spacing))`
Improve control of training progress and logging (#6184) * Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type 2020-10-03 15:57:46 +03:00			`progress = None`

Tidy up, auto-format, types 2020-10-03 17:31:58 +03:00			`def log_step(info: Optional[Dict[str, Any]]) -> None:`
Improve control of training progress and logging (#6184) * Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type 2020-10-03 15:57:46 +03:00			`nonlocal progress`
Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00
Improve control of training progress and logging (#6184) * Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type 2020-10-03 15:57:46 +03:00			`if info is None:`
			`# If we don't have a new checkpoint, just return.`
			`if progress is not None:`
			`progress.update(1)`
Tidy up, auto-format, types 2020-10-03 17:31:58 +03:00			`return`
prevent loss keyerror for non-trainable components 2020-10-05 17:33:28 +03:00			`losses = [`
			`"{0:.2f}".format(float(info["losses"][pipe_name]))`
is_trainable method 2020-10-05 18:43:42 +03:00			`for pipe_name in logged_pipes`
prevent loss keyerror for non-trainable components 2020-10-05 17:33:28 +03:00			`]`
Improve control of training progress and logging (#6184) * Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type 2020-10-03 15:57:46 +03:00
Fix speed report in table 2020-09-13 18:39:31 +03:00			`scores = []`
			`for col in score_cols:`
Fix logging 2020-09-24 12:04:35 +03:00			`score = info["other_scores"].get(col, 0.0)`
			`try:`
			`score = float(score)`
			`except TypeError:`
			`err = Errors.E916.format(name=col, score_type=type(score))`
Improve error handling around non-number scores 2020-09-24 12:29:07 +03:00			`raise ValueError(err) from None`
Improve control of training progress and logging (#6184) * Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type 2020-10-03 15:57:46 +03:00			`if col != "speed":`
			`score *= 100`
			`scores.append("{0:.2f}".format(score))`

Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00			`data = (`
			`[info["epoch"], info["step"]]`
			`+ losses`
			`+ scores`
			`+ ["{0:.2f}".format(float(info["score"]))]`
			`)`
Improve control of training progress and logging (#6184) * Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type 2020-10-03 15:57:46 +03:00			`if progress is not None:`
			`progress.close()`
Make console logger table more compact 2020-10-11 13:55:46 +03:00			`write(`
			`msg.row(data, widths=table_widths, aligns=table_aligns, spacing=spacing)`
			`)`
Improve control of training progress and logging (#6184) * Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type 2020-10-03 15:57:46 +03:00			`if progress_bar:`
			`# Set disable=None, so that it disables on non-TTY`
			`progress = tqdm.tqdm(`
Tidy up, auto-format, types 2020-10-03 17:31:58 +03:00			`total=eval_frequency, disable=None, leave=False, file=stderr`
Improve control of training progress and logging (#6184) * Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type 2020-10-03 15:57:46 +03:00			`)`
			`progress.set_description(f"Epoch {info['epoch']+1}")`
Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00
Tidy up, auto-format, types 2020-10-03 17:31:58 +03:00			`def finalize() -> None:`
Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00			`pass`

			`return log_step, finalize`

			`return setup_printer`


			`@registry.loggers("spacy.WandbLogger.v1")`
fix type 2020-08-28 15:08:33 +03:00			`def wandb_logger(project_name: str, remove_config_values: List[str] = []):`
Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00			`import wandb`

Improve control of training progress and logging (#6184) * Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type 2020-10-03 15:57:46 +03:00			`console = console_logger(progress_bar=False)`
Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00
			`def setup_logger(`
Tidy up, auto-format, types 2020-10-03 17:31:58 +03:00			`nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr`
			`) -> Tuple[Callable[[Dict[str, Any]], None], Callable[[], None]]:`
Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00			`config = nlp.config.interpolate()`
add disable_fields to wandb_logger 2020-08-28 14:55:32 +03:00			`config_dot = util.dict_to_dot(config)`
rename field 2020-08-28 15:06:23 +03:00			`for field in remove_config_values:`
add disable_fields to wandb_logger 2020-08-28 14:55:32 +03:00			`del config_dot[field]`
			`config = util.dot_to_dict(config_dot)`
fix wandb logger when calling multiple times from same script 2020-09-15 13:56:33 +03:00			`wandb.init(project=project_name, config=config, reinit=True)`
Improve control of training progress and logging (#6184) * Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type 2020-10-03 15:57:46 +03:00			`console_log_step, console_finalize = console(nlp, stdout, stderr)`
Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00
Improve control of training progress and logging (#6184) * Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type 2020-10-03 15:57:46 +03:00			`def log_step(info: Optional[Dict[str, Any]]):`
Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00			`console_log_step(info)`
Improve control of training progress and logging (#6184) * Make logging and progress easier to control * Update docs * Cleanup errors * Fix ConfigValidationError * Pass stdout/stderr, not wasabi.Printer * Fix type * Upd logging example * Fix logger example * Fix type 2020-10-03 15:57:46 +03:00			`if info is not None:`
			`score = info["score"]`
			`other_scores = info["other_scores"]`
			`losses = info["losses"]`
			`wandb.log({"score": score})`
			`if losses:`
			`wandb.log({f"loss_{k}": v for k, v in losses.items()})`
			`if isinstance(other_scores, dict):`
			`wandb.log(other_scores)`
Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00
Tidy up, auto-format, types 2020-10-03 17:31:58 +03:00			`def finalize() -> None:`
Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00			`console_finalize()`
fix wandb logger when calling multiple times from same script 2020-09-15 13:56:33 +03:00			`wandb.join()`
Weights & Biases logger for train CLI (#5971) * quick test as part of train script * train_logger in config, default ConsoleLogger in loggers catalogue * entitiy typo * add wandb_logger * cleanup * Update spacy/cli/train_logger.py Co-authored-by: Ines Montani <ines@ines.io> * move loggers to gold.loggers Co-authored-by: Ines Montani <ines@ines.io> 2020-08-26 16:24:33 +03:00
			`return log_step, finalize`

			`return setup_logger`