Add ConsoleLogger.v2 (#11214)

* Init

* Change logger to ConsoleLogger.v2

* adjust naming

* More naming adjustments

* Fix output_file reference error

* ignore type

* Add basic test for logger

* Hopefully fix mypy issue

* mypy ignore line

* Update mypy line

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Update test method name

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Change file saving logic

* Fix finalize method

* increase spacy-legacy version in requirements

* Update docs

* small adjustments

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
Edward 2022-08-29 10:23:05 +02:00 committed by GitHub
parent ba33200979
commit 6723d76f24
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 198 additions and 48 deletions

View File

@ -1,5 +1,5 @@
# Our libraries # Our libraries
spacy-legacy>=3.0.9,<3.1.0 spacy-legacy>=3.0.10,<3.1.0
spacy-loggers>=1.0.0,<2.0.0 spacy-loggers>=1.0.0,<2.0.0
cymem>=2.0.2,<2.1.0 cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0 preshed>=3.0.2,<3.1.0

View File

@ -41,7 +41,7 @@ setup_requires =
thinc>=8.1.0,<8.2.0 thinc>=8.1.0,<8.2.0
install_requires = install_requires =
# Our libraries # Our libraries
spacy-legacy>=3.0.9,<3.1.0 spacy-legacy>=3.0.10,<3.1.0
spacy-loggers>=1.0.0,<2.0.0 spacy-loggers>=1.0.0,<2.0.0
murmurhash>=0.28.0,<1.1.0 murmurhash>=0.28.0,<1.1.0
cymem>=2.0.2,<2.1.0 cymem>=2.0.2,<2.1.0

View File

@ -0,0 +1,30 @@
import pytest
import spacy
from spacy.training import loggers
@pytest.fixture()
def nlp():
nlp = spacy.blank("en")
nlp.add_pipe("ner")
return nlp
@pytest.fixture()
def info():
return {
"losses": {"ner": 100},
"other_scores": {"ENTS_F": 0.85, "ENTS_P": 0.90, "ENTS_R": 0.80},
"epoch": 100,
"step": 125,
"score": 85,
}
def test_console_logger(nlp, info):
console_logger = loggers.console_logger(
progress_bar=True, console_output=True, output_file=None
)
log_step, finalize = console_logger(nlp)
log_step(info)

View File

@ -1,10 +1,13 @@
from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO, Union
from wasabi import Printer from wasabi import Printer
from pathlib import Path
import tqdm import tqdm
import sys import sys
import srsly
from ..util import registry from ..util import registry
from ..errors import Errors from ..errors import Errors
from .. import util
if TYPE_CHECKING: if TYPE_CHECKING:
from ..language import Language # noqa: F401 from ..language import Language # noqa: F401
@ -23,13 +26,44 @@ def setup_table(
return final_cols, final_widths, ["r" for _ in final_widths] return final_cols, final_widths, ["r" for _ in final_widths]
@registry.loggers("spacy.ConsoleLogger.v1") @registry.loggers("spacy.ConsoleLogger.v2")
def console_logger(progress_bar: bool = False): def console_logger(
progress_bar: bool = False,
console_output: bool = True,
output_file: Optional[Union[str, Path]] = None,
):
"""The ConsoleLogger.v2 prints out training logs in the console and/or saves them to a jsonl file.
progress_bar (bool): Whether the logger should print the progress bar.
console_output (bool): Whether the logger should print the logs on the console.
output_file (Optional[Union[str, Path]]): The file to save the training logs to.
"""
_log_exist = False
if output_file:
output_file = util.ensure_path(output_file) # type: ignore
if output_file.exists(): # type: ignore
_log_exist = True
if not output_file.parents[0].exists(): # type: ignore
output_file.parents[0].mkdir(parents=True) # type: ignore
def setup_printer( def setup_printer(
nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr
) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]]: ) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]]:
write = lambda text: print(text, file=stdout, flush=True) write = lambda text: print(text, file=stdout, flush=True)
msg = Printer(no_print=True) msg = Printer(no_print=True)
nonlocal output_file
output_stream = None
if _log_exist:
write(
msg.warn(
f"Saving logs is disabled because {output_file} already exists."
)
)
output_file = None
elif output_file:
write(msg.info(f"Saving results to {output_file}"))
output_stream = open(output_file, "w", encoding="utf-8")
# ensure that only trainable components are logged # ensure that only trainable components are logged
logged_pipes = [ logged_pipes = [
name name
@ -40,6 +74,8 @@ def console_logger(progress_bar: bool = False):
score_weights = nlp.config["training"]["score_weights"] score_weights = nlp.config["training"]["score_weights"]
score_cols = [col for col, value in score_weights.items() if value is not None] score_cols = [col for col, value in score_weights.items() if value is not None]
loss_cols = [f"Loss {pipe}" for pipe in logged_pipes] loss_cols = [f"Loss {pipe}" for pipe in logged_pipes]
if console_output:
spacing = 2 spacing = 2
table_header, table_widths, table_aligns = setup_table( table_header, table_widths, table_aligns = setup_table(
cols=["E", "#"] + loss_cols + score_cols + ["Score"], cols=["E", "#"] + loss_cols + score_cols + ["Score"],
@ -57,12 +93,15 @@ def console_logger(progress_bar: bool = False):
if progress is not None: if progress is not None:
progress.update(1) progress.update(1)
return return
losses = [
"{0:.2f}".format(float(info["losses"][pipe_name])) losses = []
for pipe_name in logged_pipes log_losses = {}
] for pipe_name in logged_pipes:
losses.append("{0:.2f}".format(float(info["losses"][pipe_name])))
log_losses[pipe_name] = float(info["losses"][pipe_name])
scores = [] scores = []
log_scores = {}
for col in score_cols: for col in score_cols:
score = info["other_scores"].get(col, 0.0) score = info["other_scores"].get(col, 0.0)
try: try:
@ -73,6 +112,7 @@ def console_logger(progress_bar: bool = False):
if col != "speed": if col != "speed":
score *= 100 score *= 100
scores.append("{0:.2f}".format(score)) scores.append("{0:.2f}".format(score))
log_scores[str(col)] = score
data = ( data = (
[info["epoch"], info["step"]] [info["epoch"], info["step"]]
@ -80,10 +120,25 @@ def console_logger(progress_bar: bool = False):
+ scores + scores
+ ["{0:.2f}".format(float(info["score"]))] + ["{0:.2f}".format(float(info["score"]))]
) )
if output_stream:
# Write to log file per log_step
log_data = {
"epoch": info["epoch"],
"step": info["step"],
"losses": log_losses,
"scores": log_scores,
"score": float(info["score"]),
}
output_stream.write(srsly.json_dumps(log_data) + "\n")
if progress is not None: if progress is not None:
progress.close() progress.close()
if console_output:
write( write(
msg.row(data, widths=table_widths, aligns=table_aligns, spacing=spacing) msg.row(
data, widths=table_widths, aligns=table_aligns, spacing=spacing
)
) )
if progress_bar: if progress_bar:
# Set disable=None, so that it disables on non-TTY # Set disable=None, so that it disables on non-TTY
@ -93,7 +148,8 @@ def console_logger(progress_bar: bool = False):
progress.set_description(f"Epoch {info['epoch']+1}") progress.set_description(f"Epoch {info['epoch']+1}")
def finalize() -> None: def finalize() -> None:
pass if output_stream:
output_stream.close()
return log_step, finalize return log_step, finalize

View File

@ -248,6 +248,59 @@ added to an existing vectors table. See more details in
## Loggers {#loggers} ## Loggers {#loggers}
These functions are available from `@spacy.registry.loggers`.
### spacy.ConsoleLogger.v1 {#ConsoleLogger_v1}
> #### Example config
>
> ```ini
> [training.logger]
> @loggers = "spacy.ConsoleLogger.v1"
> progress_bar = true
> ```
Writes the results of a training step to the console in a tabular format.
<Accordion title="Example console output" spaced>
```cli
$ python -m spacy train config.cfg
```
```
Using CPU
Loading config and nlp from: config.cfg
Pipeline: ['tok2vec', 'tagger']
Start training
Training. Initial learn rate: 0.0
E # LOSS TOK2VEC LOSS TAGGER TAG_ACC SCORE
--- ------ ------------ ----------- ------- ------
0 0 0.00 86.20 0.22 0.00
0 200 3.08 18968.78 34.00 0.34
0 400 31.81 22539.06 33.64 0.34
0 600 92.13 22794.91 43.80 0.44
0 800 183.62 21541.39 56.05 0.56
0 1000 352.49 25461.82 65.15 0.65
0 1200 422.87 23708.82 71.84 0.72
0 1400 601.92 24994.79 76.57 0.77
0 1600 662.57 22268.02 80.20 0.80
0 1800 1101.50 28413.77 82.56 0.83
0 2000 1253.43 28736.36 85.00 0.85
0 2200 1411.02 28237.53 87.42 0.87
0 2400 1605.35 28439.95 88.70 0.89
```
Note that the cumulative loss keeps increasing within one epoch, but should
start decreasing across epochs.
</Accordion>
| Name | Description |
| -------------- | --------------------------------------------------------- |
| `progress_bar` | Whether the logger should print the progress bar ~~bool~~ |
Logging utilities for spaCy are implemented in the Logging utilities for spaCy are implemented in the
[`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the [`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the
functions are typically available from `@spacy.registry.loggers`. functions are typically available from `@spacy.registry.loggers`.

View File

@ -275,8 +275,8 @@ Render a dependency parse tree or named entity visualization.
### displacy.parse_deps {#displacy.parse_deps tag="method" new="2"} ### displacy.parse_deps {#displacy.parse_deps tag="method" new="2"}
Generate dependency parse in `{'words': [], 'arcs': []}` format. Generate dependency parse in `{'words': [], 'arcs': []}` format. For use with
For use with the `manual=True` argument in `displacy.render`. the `manual=True` argument in `displacy.render`.
> #### Example > #### Example
> >
@ -297,8 +297,8 @@ For use with the `manual=True` argument in `displacy.render`.
### displacy.parse_ents {#displacy.parse_ents tag="method" new="2"} ### displacy.parse_ents {#displacy.parse_ents tag="method" new="2"}
Generate named entities in `[{start: i, end: i, label: 'label'}]` format. Generate named entities in `[{start: i, end: i, label: 'label'}]` format. For
For use with the `manual=True` argument in `displacy.render`. use with the `manual=True` argument in `displacy.render`.
> #### Example > #### Example
> >
@ -319,8 +319,8 @@ For use with the `manual=True` argument in `displacy.render`.
### displacy.parse_spans {#displacy.parse_spans tag="method" new="2"} ### displacy.parse_spans {#displacy.parse_spans tag="method" new="2"}
Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format. Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format. For
For use with the `manual=True` argument in `displacy.render`. use with the `manual=True` argument in `displacy.render`.
> #### Example > #### Example
> >
@ -505,7 +505,7 @@ finished. To log each training step, a
and the accuracy scores on the development set. and the accuracy scores on the development set.
The built-in, default logger is the ConsoleLogger, which prints results to the The built-in, default logger is the ConsoleLogger, which prints results to the
console in tabular format. The console in tabular format and saves them to a `jsonl` file. The
[spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as [spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as
a dependency of spaCy, enables other loggers, such as one that sends results to a dependency of spaCy, enables other loggers, such as one that sends results to
a [Weights & Biases](https://www.wandb.com/) dashboard. a [Weights & Biases](https://www.wandb.com/) dashboard.
@ -513,16 +513,20 @@ a [Weights & Biases](https://www.wandb.com/) dashboard.
Instead of using one of the built-in loggers, you can Instead of using one of the built-in loggers, you can
[implement your own](/usage/training#custom-logging). [implement your own](/usage/training#custom-logging).
#### spacy.ConsoleLogger.v1 {#ConsoleLogger tag="registered function"} #### spacy.ConsoleLogger.v2 {#ConsoleLogger tag="registered function"}
> #### Example config > #### Example config
> >
> ```ini > ```ini
> [training.logger] > [training.logger]
> @loggers = "spacy.ConsoleLogger.v1" > @loggers = "spacy.ConsoleLogger.v2"
> progress_bar = true
> console_output = true
> output_file = "training_log.jsonl"
> ``` > ```
Writes the results of a training step to the console in a tabular format. Writes the results of a training step to the console in a tabular format and
saves them to a `jsonl` file.
<Accordion title="Example console output" spaced> <Accordion title="Example console output" spaced>
@ -536,22 +540,23 @@ $ python -m spacy train config.cfg
Pipeline: ['tok2vec', 'tagger'] Pipeline: ['tok2vec', 'tagger']
Start training Start training
Training. Initial learn rate: 0.0 Training. Initial learn rate: 0.0
Saving results to training_log.jsonl
E # LOSS TOK2VEC LOSS TAGGER TAG_ACC SCORE E # LOSS TOK2VEC LOSS TAGGER TAG_ACC SCORE
--- ------ ------------ ----------- ------- ------ --- ------ ------------ ----------- ------- ------
1 0 0.00 86.20 0.22 0.00 0 0 0.00 86.20 0.22 0.00
1 200 3.08 18968.78 34.00 0.34 0 200 3.08 18968.78 34.00 0.34
1 400 31.81 22539.06 33.64 0.34 0 400 31.81 22539.06 33.64 0.34
1 600 92.13 22794.91 43.80 0.44 0 600 92.13 22794.91 43.80 0.44
1 800 183.62 21541.39 56.05 0.56 0 800 183.62 21541.39 56.05 0.56
1 1000 352.49 25461.82 65.15 0.65 0 1000 352.49 25461.82 65.15 0.65
1 1200 422.87 23708.82 71.84 0.72 0 1200 422.87 23708.82 71.84 0.72
1 1400 601.92 24994.79 76.57 0.77 0 1400 601.92 24994.79 76.57 0.77
1 1600 662.57 22268.02 80.20 0.80 0 1600 662.57 22268.02 80.20 0.80
1 1800 1101.50 28413.77 82.56 0.83 0 1800 1101.50 28413.77 82.56 0.83
1 2000 1253.43 28736.36 85.00 0.85 0 2000 1253.43 28736.36 85.00 0.85
1 2200 1411.02 28237.53 87.42 0.87 0 2200 1411.02 28237.53 87.42 0.87
1 2400 1605.35 28439.95 88.70 0.89 0 2400 1605.35 28439.95 88.70 0.89
``` ```
Note that the cumulative loss keeps increasing within one epoch, but should Note that the cumulative loss keeps increasing within one epoch, but should
@ -559,6 +564,12 @@ start decreasing across epochs.
</Accordion> </Accordion>
| Name | Description |
| ---------------- | --------------------------------------------------------------------- |
| `progress_bar` | Whether the logger should print the progress bar ~~bool~~ |
| `console_output` | Whether the logger should print the logs on the console. ~~bool~~ |
| `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ |
## Readers {#readers} ## Readers {#readers}
### File readers {#file-readers source="github.com/explosion/srsly" new="3"} ### File readers {#file-readers source="github.com/explosion/srsly" new="3"}