mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-23 15:54:13 +03:00
Add ConsoleLogger.v2 (#11214)
* Init * Change logger to ConsoleLogger.v2 * adjust naming * More naming adjustments * Fix output_file reference error * ignore type * Add basic test for logger * Hopefully fix mypy issue * mypy ignore line * Update mypy line Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Update test method name Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Change file saving logic * Fix finalize method * increase spacy-legacy version in requirements * Update docs * small adjustments Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
parent
ba33200979
commit
6723d76f24
|
@ -1,5 +1,5 @@
|
||||||
# Our libraries
|
# Our libraries
|
||||||
spacy-legacy>=3.0.9,<3.1.0
|
spacy-legacy>=3.0.10,<3.1.0
|
||||||
spacy-loggers>=1.0.0,<2.0.0
|
spacy-loggers>=1.0.0,<2.0.0
|
||||||
cymem>=2.0.2,<2.1.0
|
cymem>=2.0.2,<2.1.0
|
||||||
preshed>=3.0.2,<3.1.0
|
preshed>=3.0.2,<3.1.0
|
||||||
|
|
|
@ -41,7 +41,7 @@ setup_requires =
|
||||||
thinc>=8.1.0,<8.2.0
|
thinc>=8.1.0,<8.2.0
|
||||||
install_requires =
|
install_requires =
|
||||||
# Our libraries
|
# Our libraries
|
||||||
spacy-legacy>=3.0.9,<3.1.0
|
spacy-legacy>=3.0.10,<3.1.0
|
||||||
spacy-loggers>=1.0.0,<2.0.0
|
spacy-loggers>=1.0.0,<2.0.0
|
||||||
murmurhash>=0.28.0,<1.1.0
|
murmurhash>=0.28.0,<1.1.0
|
||||||
cymem>=2.0.2,<2.1.0
|
cymem>=2.0.2,<2.1.0
|
||||||
|
|
30
spacy/tests/training/test_logger.py
Normal file
30
spacy/tests/training/test_logger.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
import pytest
|
||||||
|
import spacy
|
||||||
|
|
||||||
|
from spacy.training import loggers
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def nlp():
|
||||||
|
nlp = spacy.blank("en")
|
||||||
|
nlp.add_pipe("ner")
|
||||||
|
return nlp
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def info():
|
||||||
|
return {
|
||||||
|
"losses": {"ner": 100},
|
||||||
|
"other_scores": {"ENTS_F": 0.85, "ENTS_P": 0.90, "ENTS_R": 0.80},
|
||||||
|
"epoch": 100,
|
||||||
|
"step": 125,
|
||||||
|
"score": 85,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_console_logger(nlp, info):
|
||||||
|
console_logger = loggers.console_logger(
|
||||||
|
progress_bar=True, console_output=True, output_file=None
|
||||||
|
)
|
||||||
|
log_step, finalize = console_logger(nlp)
|
||||||
|
log_step(info)
|
|
@ -1,10 +1,13 @@
|
||||||
from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO
|
from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO, Union
|
||||||
from wasabi import Printer
|
from wasabi import Printer
|
||||||
|
from pathlib import Path
|
||||||
import tqdm
|
import tqdm
|
||||||
import sys
|
import sys
|
||||||
|
import srsly
|
||||||
|
|
||||||
from ..util import registry
|
from ..util import registry
|
||||||
from ..errors import Errors
|
from ..errors import Errors
|
||||||
|
from .. import util
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from ..language import Language # noqa: F401
|
from ..language import Language # noqa: F401
|
||||||
|
@ -23,13 +26,44 @@ def setup_table(
|
||||||
return final_cols, final_widths, ["r" for _ in final_widths]
|
return final_cols, final_widths, ["r" for _ in final_widths]
|
||||||
|
|
||||||
|
|
||||||
@registry.loggers("spacy.ConsoleLogger.v1")
|
@registry.loggers("spacy.ConsoleLogger.v2")
|
||||||
def console_logger(progress_bar: bool = False):
|
def console_logger(
|
||||||
|
progress_bar: bool = False,
|
||||||
|
console_output: bool = True,
|
||||||
|
output_file: Optional[Union[str, Path]] = None,
|
||||||
|
):
|
||||||
|
"""The ConsoleLogger.v2 prints out training logs in the console and/or saves them to a jsonl file.
|
||||||
|
progress_bar (bool): Whether the logger should print the progress bar.
|
||||||
|
console_output (bool): Whether the logger should print the logs on the console.
|
||||||
|
output_file (Optional[Union[str, Path]]): The file to save the training logs to.
|
||||||
|
"""
|
||||||
|
_log_exist = False
|
||||||
|
if output_file:
|
||||||
|
output_file = util.ensure_path(output_file) # type: ignore
|
||||||
|
if output_file.exists(): # type: ignore
|
||||||
|
_log_exist = True
|
||||||
|
if not output_file.parents[0].exists(): # type: ignore
|
||||||
|
output_file.parents[0].mkdir(parents=True) # type: ignore
|
||||||
|
|
||||||
def setup_printer(
|
def setup_printer(
|
||||||
nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr
|
nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr
|
||||||
) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]]:
|
) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]]:
|
||||||
write = lambda text: print(text, file=stdout, flush=True)
|
write = lambda text: print(text, file=stdout, flush=True)
|
||||||
msg = Printer(no_print=True)
|
msg = Printer(no_print=True)
|
||||||
|
|
||||||
|
nonlocal output_file
|
||||||
|
output_stream = None
|
||||||
|
if _log_exist:
|
||||||
|
write(
|
||||||
|
msg.warn(
|
||||||
|
f"Saving logs is disabled because {output_file} already exists."
|
||||||
|
)
|
||||||
|
)
|
||||||
|
output_file = None
|
||||||
|
elif output_file:
|
||||||
|
write(msg.info(f"Saving results to {output_file}"))
|
||||||
|
output_stream = open(output_file, "w", encoding="utf-8")
|
||||||
|
|
||||||
# ensure that only trainable components are logged
|
# ensure that only trainable components are logged
|
||||||
logged_pipes = [
|
logged_pipes = [
|
||||||
name
|
name
|
||||||
|
@ -40,6 +74,8 @@ def console_logger(progress_bar: bool = False):
|
||||||
score_weights = nlp.config["training"]["score_weights"]
|
score_weights = nlp.config["training"]["score_weights"]
|
||||||
score_cols = [col for col, value in score_weights.items() if value is not None]
|
score_cols = [col for col, value in score_weights.items() if value is not None]
|
||||||
loss_cols = [f"Loss {pipe}" for pipe in logged_pipes]
|
loss_cols = [f"Loss {pipe}" for pipe in logged_pipes]
|
||||||
|
|
||||||
|
if console_output:
|
||||||
spacing = 2
|
spacing = 2
|
||||||
table_header, table_widths, table_aligns = setup_table(
|
table_header, table_widths, table_aligns = setup_table(
|
||||||
cols=["E", "#"] + loss_cols + score_cols + ["Score"],
|
cols=["E", "#"] + loss_cols + score_cols + ["Score"],
|
||||||
|
@ -57,12 +93,15 @@ def console_logger(progress_bar: bool = False):
|
||||||
if progress is not None:
|
if progress is not None:
|
||||||
progress.update(1)
|
progress.update(1)
|
||||||
return
|
return
|
||||||
losses = [
|
|
||||||
"{0:.2f}".format(float(info["losses"][pipe_name]))
|
losses = []
|
||||||
for pipe_name in logged_pipes
|
log_losses = {}
|
||||||
]
|
for pipe_name in logged_pipes:
|
||||||
|
losses.append("{0:.2f}".format(float(info["losses"][pipe_name])))
|
||||||
|
log_losses[pipe_name] = float(info["losses"][pipe_name])
|
||||||
|
|
||||||
scores = []
|
scores = []
|
||||||
|
log_scores = {}
|
||||||
for col in score_cols:
|
for col in score_cols:
|
||||||
score = info["other_scores"].get(col, 0.0)
|
score = info["other_scores"].get(col, 0.0)
|
||||||
try:
|
try:
|
||||||
|
@ -73,6 +112,7 @@ def console_logger(progress_bar: bool = False):
|
||||||
if col != "speed":
|
if col != "speed":
|
||||||
score *= 100
|
score *= 100
|
||||||
scores.append("{0:.2f}".format(score))
|
scores.append("{0:.2f}".format(score))
|
||||||
|
log_scores[str(col)] = score
|
||||||
|
|
||||||
data = (
|
data = (
|
||||||
[info["epoch"], info["step"]]
|
[info["epoch"], info["step"]]
|
||||||
|
@ -80,10 +120,25 @@ def console_logger(progress_bar: bool = False):
|
||||||
+ scores
|
+ scores
|
||||||
+ ["{0:.2f}".format(float(info["score"]))]
|
+ ["{0:.2f}".format(float(info["score"]))]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if output_stream:
|
||||||
|
# Write to log file per log_step
|
||||||
|
log_data = {
|
||||||
|
"epoch": info["epoch"],
|
||||||
|
"step": info["step"],
|
||||||
|
"losses": log_losses,
|
||||||
|
"scores": log_scores,
|
||||||
|
"score": float(info["score"]),
|
||||||
|
}
|
||||||
|
output_stream.write(srsly.json_dumps(log_data) + "\n")
|
||||||
|
|
||||||
if progress is not None:
|
if progress is not None:
|
||||||
progress.close()
|
progress.close()
|
||||||
|
if console_output:
|
||||||
write(
|
write(
|
||||||
msg.row(data, widths=table_widths, aligns=table_aligns, spacing=spacing)
|
msg.row(
|
||||||
|
data, widths=table_widths, aligns=table_aligns, spacing=spacing
|
||||||
|
)
|
||||||
)
|
)
|
||||||
if progress_bar:
|
if progress_bar:
|
||||||
# Set disable=None, so that it disables on non-TTY
|
# Set disable=None, so that it disables on non-TTY
|
||||||
|
@ -93,7 +148,8 @@ def console_logger(progress_bar: bool = False):
|
||||||
progress.set_description(f"Epoch {info['epoch']+1}")
|
progress.set_description(f"Epoch {info['epoch']+1}")
|
||||||
|
|
||||||
def finalize() -> None:
|
def finalize() -> None:
|
||||||
pass
|
if output_stream:
|
||||||
|
output_stream.close()
|
||||||
|
|
||||||
return log_step, finalize
|
return log_step, finalize
|
||||||
|
|
||||||
|
|
|
@ -248,6 +248,59 @@ added to an existing vectors table. See more details in
|
||||||
|
|
||||||
## Loggers {#loggers}
|
## Loggers {#loggers}
|
||||||
|
|
||||||
|
These functions are available from `@spacy.registry.loggers`.
|
||||||
|
|
||||||
|
### spacy.ConsoleLogger.v1 {#ConsoleLogger_v1}
|
||||||
|
|
||||||
|
> #### Example config
|
||||||
|
>
|
||||||
|
> ```ini
|
||||||
|
> [training.logger]
|
||||||
|
> @loggers = "spacy.ConsoleLogger.v1"
|
||||||
|
> progress_bar = true
|
||||||
|
> ```
|
||||||
|
|
||||||
|
Writes the results of a training step to the console in a tabular format.
|
||||||
|
|
||||||
|
<Accordion title="Example console output" spaced>
|
||||||
|
|
||||||
|
```cli
|
||||||
|
$ python -m spacy train config.cfg
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
ℹ Using CPU
|
||||||
|
ℹ Loading config and nlp from: config.cfg
|
||||||
|
ℹ Pipeline: ['tok2vec', 'tagger']
|
||||||
|
ℹ Start training
|
||||||
|
ℹ Training. Initial learn rate: 0.0
|
||||||
|
|
||||||
|
E # LOSS TOK2VEC LOSS TAGGER TAG_ACC SCORE
|
||||||
|
--- ------ ------------ ----------- ------- ------
|
||||||
|
0 0 0.00 86.20 0.22 0.00
|
||||||
|
0 200 3.08 18968.78 34.00 0.34
|
||||||
|
0 400 31.81 22539.06 33.64 0.34
|
||||||
|
0 600 92.13 22794.91 43.80 0.44
|
||||||
|
0 800 183.62 21541.39 56.05 0.56
|
||||||
|
0 1000 352.49 25461.82 65.15 0.65
|
||||||
|
0 1200 422.87 23708.82 71.84 0.72
|
||||||
|
0 1400 601.92 24994.79 76.57 0.77
|
||||||
|
0 1600 662.57 22268.02 80.20 0.80
|
||||||
|
0 1800 1101.50 28413.77 82.56 0.83
|
||||||
|
0 2000 1253.43 28736.36 85.00 0.85
|
||||||
|
0 2200 1411.02 28237.53 87.42 0.87
|
||||||
|
0 2400 1605.35 28439.95 88.70 0.89
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that the cumulative loss keeps increasing within one epoch, but should
|
||||||
|
start decreasing across epochs.
|
||||||
|
|
||||||
|
</Accordion>
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
| -------------- | --------------------------------------------------------- |
|
||||||
|
| `progress_bar` | Whether the logger should print the progress bar ~~bool~~ |
|
||||||
|
|
||||||
Logging utilities for spaCy are implemented in the
|
Logging utilities for spaCy are implemented in the
|
||||||
[`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the
|
[`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the
|
||||||
functions are typically available from `@spacy.registry.loggers`.
|
functions are typically available from `@spacy.registry.loggers`.
|
||||||
|
|
|
@ -275,8 +275,8 @@ Render a dependency parse tree or named entity visualization.
|
||||||
|
|
||||||
### displacy.parse_deps {#displacy.parse_deps tag="method" new="2"}
|
### displacy.parse_deps {#displacy.parse_deps tag="method" new="2"}
|
||||||
|
|
||||||
Generate dependency parse in `{'words': [], 'arcs': []}` format.
|
Generate dependency parse in `{'words': [], 'arcs': []}` format. For use with
|
||||||
For use with the `manual=True` argument in `displacy.render`.
|
the `manual=True` argument in `displacy.render`.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -297,8 +297,8 @@ For use with the `manual=True` argument in `displacy.render`.
|
||||||
|
|
||||||
### displacy.parse_ents {#displacy.parse_ents tag="method" new="2"}
|
### displacy.parse_ents {#displacy.parse_ents tag="method" new="2"}
|
||||||
|
|
||||||
Generate named entities in `[{start: i, end: i, label: 'label'}]` format.
|
Generate named entities in `[{start: i, end: i, label: 'label'}]` format. For
|
||||||
For use with the `manual=True` argument in `displacy.render`.
|
use with the `manual=True` argument in `displacy.render`.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -319,8 +319,8 @@ For use with the `manual=True` argument in `displacy.render`.
|
||||||
|
|
||||||
### displacy.parse_spans {#displacy.parse_spans tag="method" new="2"}
|
### displacy.parse_spans {#displacy.parse_spans tag="method" new="2"}
|
||||||
|
|
||||||
Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format.
|
Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format. For
|
||||||
For use with the `manual=True` argument in `displacy.render`.
|
use with the `manual=True` argument in `displacy.render`.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
@ -505,7 +505,7 @@ finished. To log each training step, a
|
||||||
and the accuracy scores on the development set.
|
and the accuracy scores on the development set.
|
||||||
|
|
||||||
The built-in, default logger is the ConsoleLogger, which prints results to the
|
The built-in, default logger is the ConsoleLogger, which prints results to the
|
||||||
console in tabular format. The
|
console in tabular format and saves them to a `jsonl` file. The
|
||||||
[spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as
|
[spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as
|
||||||
a dependency of spaCy, enables other loggers, such as one that sends results to
|
a dependency of spaCy, enables other loggers, such as one that sends results to
|
||||||
a [Weights & Biases](https://www.wandb.com/) dashboard.
|
a [Weights & Biases](https://www.wandb.com/) dashboard.
|
||||||
|
@ -513,16 +513,20 @@ a [Weights & Biases](https://www.wandb.com/) dashboard.
|
||||||
Instead of using one of the built-in loggers, you can
|
Instead of using one of the built-in loggers, you can
|
||||||
[implement your own](/usage/training#custom-logging).
|
[implement your own](/usage/training#custom-logging).
|
||||||
|
|
||||||
#### spacy.ConsoleLogger.v1 {#ConsoleLogger tag="registered function"}
|
#### spacy.ConsoleLogger.v2 {#ConsoleLogger tag="registered function"}
|
||||||
|
|
||||||
> #### Example config
|
> #### Example config
|
||||||
>
|
>
|
||||||
> ```ini
|
> ```ini
|
||||||
> [training.logger]
|
> [training.logger]
|
||||||
> @loggers = "spacy.ConsoleLogger.v1"
|
> @loggers = "spacy.ConsoleLogger.v2"
|
||||||
|
> progress_bar = true
|
||||||
|
> console_output = true
|
||||||
|
> output_file = "training_log.jsonl"
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
Writes the results of a training step to the console in a tabular format.
|
Writes the results of a training step to the console in a tabular format and
|
||||||
|
saves them to a `jsonl` file.
|
||||||
|
|
||||||
<Accordion title="Example console output" spaced>
|
<Accordion title="Example console output" spaced>
|
||||||
|
|
||||||
|
@ -536,22 +540,23 @@ $ python -m spacy train config.cfg
|
||||||
ℹ Pipeline: ['tok2vec', 'tagger']
|
ℹ Pipeline: ['tok2vec', 'tagger']
|
||||||
ℹ Start training
|
ℹ Start training
|
||||||
ℹ Training. Initial learn rate: 0.0
|
ℹ Training. Initial learn rate: 0.0
|
||||||
|
ℹ Saving results to training_log.jsonl
|
||||||
|
|
||||||
E # LOSS TOK2VEC LOSS TAGGER TAG_ACC SCORE
|
E # LOSS TOK2VEC LOSS TAGGER TAG_ACC SCORE
|
||||||
--- ------ ------------ ----------- ------- ------
|
--- ------ ------------ ----------- ------- ------
|
||||||
1 0 0.00 86.20 0.22 0.00
|
0 0 0.00 86.20 0.22 0.00
|
||||||
1 200 3.08 18968.78 34.00 0.34
|
0 200 3.08 18968.78 34.00 0.34
|
||||||
1 400 31.81 22539.06 33.64 0.34
|
0 400 31.81 22539.06 33.64 0.34
|
||||||
1 600 92.13 22794.91 43.80 0.44
|
0 600 92.13 22794.91 43.80 0.44
|
||||||
1 800 183.62 21541.39 56.05 0.56
|
0 800 183.62 21541.39 56.05 0.56
|
||||||
1 1000 352.49 25461.82 65.15 0.65
|
0 1000 352.49 25461.82 65.15 0.65
|
||||||
1 1200 422.87 23708.82 71.84 0.72
|
0 1200 422.87 23708.82 71.84 0.72
|
||||||
1 1400 601.92 24994.79 76.57 0.77
|
0 1400 601.92 24994.79 76.57 0.77
|
||||||
1 1600 662.57 22268.02 80.20 0.80
|
0 1600 662.57 22268.02 80.20 0.80
|
||||||
1 1800 1101.50 28413.77 82.56 0.83
|
0 1800 1101.50 28413.77 82.56 0.83
|
||||||
1 2000 1253.43 28736.36 85.00 0.85
|
0 2000 1253.43 28736.36 85.00 0.85
|
||||||
1 2200 1411.02 28237.53 87.42 0.87
|
0 2200 1411.02 28237.53 87.42 0.87
|
||||||
1 2400 1605.35 28439.95 88.70 0.89
|
0 2400 1605.35 28439.95 88.70 0.89
|
||||||
```
|
```
|
||||||
|
|
||||||
Note that the cumulative loss keeps increasing within one epoch, but should
|
Note that the cumulative loss keeps increasing within one epoch, but should
|
||||||
|
@ -559,6 +564,12 @@ start decreasing across epochs.
|
||||||
|
|
||||||
</Accordion>
|
</Accordion>
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
| ---------------- | --------------------------------------------------------------------- |
|
||||||
|
| `progress_bar` | Whether the logger should print the progress bar ~~bool~~ |
|
||||||
|
| `console_output` | Whether the logger should print the logs on the console. ~~bool~~ |
|
||||||
|
| `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ |
|
||||||
|
|
||||||
## Readers {#readers}
|
## Readers {#readers}
|
||||||
|
|
||||||
### File readers {#file-readers source="github.com/explosion/srsly" new="3"}
|
### File readers {#file-readers source="github.com/explosion/srsly" new="3"}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user