mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
New console logger with expanded progress tracking (#11972)
* Add `ConsoleLogger.v3` This addition expands the progress bar feature to count up the training/distillation steps to either the next evaluation pass or the maximum number of steps. * Rename progress bar types * Add defaults to docs Minor fixes * Move comment * Minor punctuation fixes * Explicitly check for `None` when validating progress bar type Co-authored-by: Paul O'Leary McCann <polm@dampfkraft.com>
This commit is contained in:
parent
90896504a5
commit
aa2b471a6e
|
@ -962,6 +962,7 @@ class Errors(metaclass=ErrorsWithCodes):
|
|||
E1046 = ("{cls_name} is an abstract class and cannot be instantiated. If you are looking for spaCy's default "
|
||||
"knowledge base, use `InMemoryLookupKB`.")
|
||||
E1047 = ("`find_threshold()` only supports components with a `scorer` attribute.")
|
||||
E1048 = ("Got '{unexpected}' as console progress bar type, but expected one of the following: {expected}")
|
||||
|
||||
|
||||
# Deprecated model shortcuts, only used in errors and warnings
|
||||
|
|
|
@ -26,6 +26,8 @@ def setup_table(
|
|||
return final_cols, final_widths, ["r" for _ in final_widths]
|
||||
|
||||
|
||||
# We cannot rename this method as it's directly imported
|
||||
# and used by external packages such as spacy-loggers.
|
||||
@registry.loggers("spacy.ConsoleLogger.v2")
|
||||
def console_logger(
|
||||
progress_bar: bool = False,
|
||||
|
@ -33,7 +35,27 @@ def console_logger(
|
|||
output_file: Optional[Union[str, Path]] = None,
|
||||
):
|
||||
"""The ConsoleLogger.v2 prints out training logs in the console and/or saves them to a jsonl file.
|
||||
progress_bar (bool): Whether the logger should print the progress bar.
|
||||
progress_bar (bool): Whether the logger should print a progress bar tracking the steps till the next evaluation pass.
|
||||
console_output (bool): Whether the logger should print the logs on the console.
|
||||
output_file (Optional[Union[str, Path]]): The file to save the training logs to.
|
||||
"""
|
||||
return console_logger_v3(
|
||||
progress_bar=None if progress_bar is False else "eval",
|
||||
console_output=console_output,
|
||||
output_file=output_file,
|
||||
)
|
||||
|
||||
|
||||
@registry.loggers("spacy.ConsoleLogger.v3")
|
||||
def console_logger_v3(
|
||||
progress_bar: Optional[str] = None,
|
||||
console_output: bool = True,
|
||||
output_file: Optional[Union[str, Path]] = None,
|
||||
):
|
||||
"""The ConsoleLogger.v3 prints out training logs in the console and/or saves them to a jsonl file.
|
||||
progress_bar (Optional[str]): Type of progress bar to show in the console. Allowed values:
|
||||
train - Tracks the number of steps from the beginning of training until the full training run is complete (training.max_steps is reached).
|
||||
eval - Tracks the number of steps between the previous and next evaluation (training.eval_frequency is reached).
|
||||
console_output (bool): Whether the logger should print the logs on the console.
|
||||
output_file (Optional[Union[str, Path]]): The file to save the training logs to.
|
||||
"""
|
||||
|
@ -70,6 +92,7 @@ def console_logger(
|
|||
for name, proc in nlp.pipeline
|
||||
if hasattr(proc, "is_trainable") and proc.is_trainable
|
||||
]
|
||||
max_steps = nlp.config["training"]["max_steps"]
|
||||
eval_frequency = nlp.config["training"]["eval_frequency"]
|
||||
score_weights = nlp.config["training"]["score_weights"]
|
||||
score_cols = [col for col, value in score_weights.items() if value is not None]
|
||||
|
@ -84,6 +107,13 @@ def console_logger(
|
|||
write(msg.row(table_header, widths=table_widths, spacing=spacing))
|
||||
write(msg.row(["-" * width for width in table_widths], spacing=spacing))
|
||||
progress = None
|
||||
expected_progress_types = ("train", "eval")
|
||||
if progress_bar is not None and progress_bar not in expected_progress_types:
|
||||
raise ValueError(
|
||||
Errors.E1048.format(
|
||||
unexpected=progress_bar, expected=expected_progress_types
|
||||
)
|
||||
)
|
||||
|
||||
def log_step(info: Optional[Dict[str, Any]]) -> None:
|
||||
nonlocal progress
|
||||
|
@ -141,11 +171,23 @@ def console_logger(
|
|||
)
|
||||
)
|
||||
if progress_bar:
|
||||
if progress_bar == "train":
|
||||
total = max_steps
|
||||
desc = f"Last Eval Epoch: {info['epoch']}"
|
||||
initial = info["step"]
|
||||
else:
|
||||
total = eval_frequency
|
||||
desc = f"Epoch {info['epoch']+1}"
|
||||
initial = 0
|
||||
# Set disable=None, so that it disables on non-TTY
|
||||
progress = tqdm.tqdm(
|
||||
total=eval_frequency, disable=None, leave=False, file=stderr
|
||||
total=total,
|
||||
disable=None,
|
||||
leave=False,
|
||||
file=stderr,
|
||||
initial=initial,
|
||||
)
|
||||
progress.set_description(f"Epoch {info['epoch']+1}")
|
||||
progress.set_description(desc)
|
||||
|
||||
def finalize() -> None:
|
||||
if output_stream:
|
||||
|
|
|
@ -513,7 +513,7 @@ a [Weights & Biases](https://www.wandb.com/) dashboard.
|
|||
Instead of using one of the built-in loggers, you can
|
||||
[implement your own](/usage/training#custom-logging).
|
||||
|
||||
#### spacy.ConsoleLogger.v2 {#ConsoleLogger tag="registered function"}
|
||||
#### spacy.ConsoleLogger.v2 {tag="registered function"}
|
||||
|
||||
> #### Example config
|
||||
>
|
||||
|
@ -564,11 +564,33 @@ start decreasing across epochs.
|
|||
|
||||
</Accordion>
|
||||
|
||||
| Name | Description |
|
||||
| ---------------- | --------------------------------------------------------------------- |
|
||||
| `progress_bar` | Whether the logger should print the progress bar ~~bool~~ |
|
||||
| `console_output` | Whether the logger should print the logs on the console. ~~bool~~ |
|
||||
| `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ |
|
||||
| Name | Description |
|
||||
| ---------------- | ---------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `progress_bar` | Whether the logger should print a progress bar tracking the steps till the next evaluation pass (default: `False`). ~~bool~~ |
|
||||
| `console_output` | Whether the logger should print the logs in the console (default: `True`). ~~bool~~ |
|
||||
| `output_file` | The file to save the training logs to (default: `None`). ~~Optional[Union[str, Path]]~~ |
|
||||
|
||||
#### spacy.ConsoleLogger.v3 {#ConsoleLogger tag="registered function"}
|
||||
|
||||
> #### Example config
|
||||
>
|
||||
> ```ini
|
||||
> [training.logger]
|
||||
> @loggers = "spacy.ConsoleLogger.v3"
|
||||
> progress_bar = "all_steps"
|
||||
> console_output = true
|
||||
> output_file = "training_log.jsonl"
|
||||
> ```
|
||||
|
||||
Writes the results of a training step to the console in a tabular format and
|
||||
optionally saves them to a `jsonl` file.
|
||||
|
||||
| Name | Description |
|
||||
| ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `progress_bar` | Type of progress bar to show in the console: `"train"`, `"eval"` or `None`. |
|
||||
| | The bar tracks the number of steps until `training.max_steps` and `training.eval_frequency` are reached respectively (default: `None`). ~~Optional[str]~~ |
|
||||
| `console_output` | Whether the logger should print the logs in the console (default: `True`). ~~bool~~ |
|
||||
| `output_file` | The file to save the training logs to (default: `None`). ~~Optional[Union[str, Path]]~~ |
|
||||
|
||||
## Readers {#readers}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user