Add ConsoleLogger.v3

This addition expands the progress bar feature to count up the training/distillation steps to either the next evaluation pass or the maximum number of steps.
This commit is contained in:
shademe 2022-12-14 16:56:37 +01:00
parent e5c7f3b077
commit 93ba4e72fa
No known key found for this signature in database
GPG Key ID: 6FCA9FC635B2A402
3 changed files with 73 additions and 8 deletions

View File

@ -962,6 +962,7 @@ class Errors(metaclass=ErrorsWithCodes):
E1046 = ("{cls_name} is an abstract class and cannot be instantiated. If you are looking for spaCy's default "
"knowledge base, use `InMemoryLookupKB`.")
E1047 = ("`find_threshold()` only supports components with a `scorer` attribute.")
E1048 = ("Got '{unexpected}' as console progress bar type, but expected one of the following: {expected}")
# Deprecated model shortcuts, only used in errors and warnings

View File

@ -26,6 +26,8 @@ def setup_table(
return final_cols, final_widths, ["r" for _ in final_widths]
# We cannot rename this method as it's directly imported
# and used by external packages such as spacy-loggers.
@registry.loggers("spacy.ConsoleLogger.v2")
def console_logger(
progress_bar: bool = False,
@ -33,7 +35,27 @@ def console_logger(
output_file: Optional[Union[str, Path]] = None,
):
"""The ConsoleLogger.v2 prints out training logs in the console and/or saves them to a jsonl file.
progress_bar (bool): Whether the logger should print the progress bar.
progress_bar (bool): Whether the logger should print a progress bar tracking the steps till the next evaluation pass.
console_output (bool): Whether the logger should print the logs on the console.
output_file (Optional[Union[str, Path]]): The file to save the training logs to.
"""
return console_logger_v3(
progress_bar=None if progress_bar is False else "eval_steps",
console_output=console_output,
output_file=output_file,
)
@registry.loggers("spacy.ConsoleLogger.v3")
def console_logger_v3(
progress_bar: Optional[str] = None,
console_output: bool = True,
output_file: Optional[Union[str, Path]] = None,
):
"""The ConsoleLogger.v3 prints out training logs in the console and/or saves them to a jsonl file.
progress_bar (Optional[str]): Type of progress bar to show in the console. Allowed values:
all_steps - Tracks the number of steps until `training.max_steps` is reached.
eval_steps - Tracks the number of steps until `training.eval_frequency` is reached.
console_output (bool): Whether the logger should print the logs on the console.
output_file (Optional[Union[str, Path]]): The file to save the training logs to.
"""
@ -70,6 +92,7 @@ def console_logger(
for name, proc in nlp.pipeline
if hasattr(proc, "is_trainable") and proc.is_trainable
]
max_steps = nlp.config["training"]["max_steps"]
eval_frequency = nlp.config["training"]["eval_frequency"]
score_weights = nlp.config["training"]["score_weights"]
score_cols = [col for col, value in score_weights.items() if value is not None]
@ -84,6 +107,13 @@ def console_logger(
write(msg.row(table_header, widths=table_widths, spacing=spacing))
write(msg.row(["-" * width for width in table_widths], spacing=spacing))
progress = None
expected_progress_types = ("all_steps", "eval_steps", None)
if progress_bar not in expected_progress_types:
raise ValueError(
Errors.E1048.format(
unexpected=progress_bar, expected=expected_progress_types
)
)
def log_step(info: Optional[Dict[str, Any]]) -> None:
nonlocal progress
@ -142,10 +172,22 @@ def console_logger(
)
if progress_bar:
# Set disable=None, so that it disables on non-TTY
if progress_bar == "all_steps":
total = max_steps
desc = f"Last Eval Epoch: {info['epoch']}"
initial = info["step"]
else:
total = eval_frequency
desc = f"Epoch {info['epoch']+1}"
initial = 0
progress = tqdm.tqdm(
total=eval_frequency, disable=None, leave=False, file=stderr
total=total,
disable=None,
leave=False,
file=stderr,
initial=initial,
)
progress.set_description(f"Epoch {info['epoch']+1}")
progress.set_description(desc)
def finalize() -> None:
if output_stream:

View File

@ -564,11 +564,33 @@ start decreasing across epochs.
</Accordion>
| Name | Description |
| ---------------- | --------------------------------------------------------------------- |
| `progress_bar` | Whether the logger should print the progress bar ~~bool~~ |
| `console_output` | Whether the logger should print the logs on the console. ~~bool~~ |
| `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ |
| Name | Description |
| ---------------- | -------------------------------------------------------------------------------------------------------- |
| `progress_bar` | Whether the logger should print a progress bar tracking the steps till the next evaluation pass.~~bool~~ |
| `console_output` | Whether the logger should print the logs in the console. ~~bool~~ |
| `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ |
#### spacy.ConsoleLogger.v3 {#ConsoleLogger tag="registered function"}
> #### Example config
>
> ```ini
> [training.logger]
> @loggers = "spacy.ConsoleLogger.v3"
> progress_bar = "all_steps"
> console_output = true
> output_file = "training_log.jsonl"
> ```
Writes the results of a training step to the console in a tabular format and
saves them to a `jsonl` file.
| Name | Description |
| ---------------- | ---------------------------------------------------------------------------------------------------------------------------------- |
| `progress_bar` | Type of progress bar to show in the console: `all_steps` or `eval_steps` |
| | They track the number of steps until `training.max_steps` and `training.eval_frequency` are reached respectively.~~Optional[str]~~ |
| `console_output` | Whether the logger should print the logs in the console.~~bool~~ |
| `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ |
## Readers {#readers}