mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
Add ConsoleLogger.v2 (#11214)
* Init * Change logger to ConsoleLogger.v2 * adjust naming * More naming adjustments * Fix output_file reference error * ignore type * Add basic test for logger * Hopefully fix mypy issue * mypy ignore line * Update mypy line Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Update test method name Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Change file saving logic * Fix finalize method * increase spacy-legacy version in requirements * Update docs * small adjustments Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
parent
ba33200979
commit
6723d76f24
|
@ -1,5 +1,5 @@
|
|||
# Our libraries
|
||||
spacy-legacy>=3.0.9,<3.1.0
|
||||
spacy-legacy>=3.0.10,<3.1.0
|
||||
spacy-loggers>=1.0.0,<2.0.0
|
||||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
|
|
|
@ -41,7 +41,7 @@ setup_requires =
|
|||
thinc>=8.1.0,<8.2.0
|
||||
install_requires =
|
||||
# Our libraries
|
||||
spacy-legacy>=3.0.9,<3.1.0
|
||||
spacy-legacy>=3.0.10,<3.1.0
|
||||
spacy-loggers>=1.0.0,<2.0.0
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
cymem>=2.0.2,<2.1.0
|
||||
|
|
30
spacy/tests/training/test_logger.py
Normal file
30
spacy/tests/training/test_logger.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
import pytest
|
||||
import spacy
|
||||
|
||||
from spacy.training import loggers
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def nlp():
|
||||
nlp = spacy.blank("en")
|
||||
nlp.add_pipe("ner")
|
||||
return nlp
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def info():
|
||||
return {
|
||||
"losses": {"ner": 100},
|
||||
"other_scores": {"ENTS_F": 0.85, "ENTS_P": 0.90, "ENTS_R": 0.80},
|
||||
"epoch": 100,
|
||||
"step": 125,
|
||||
"score": 85,
|
||||
}
|
||||
|
||||
|
||||
def test_console_logger(nlp, info):
|
||||
console_logger = loggers.console_logger(
|
||||
progress_bar=True, console_output=True, output_file=None
|
||||
)
|
||||
log_step, finalize = console_logger(nlp)
|
||||
log_step(info)
|
|
@ -1,10 +1,13 @@
|
|||
from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO
|
||||
from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO, Union
|
||||
from wasabi import Printer
|
||||
from pathlib import Path
|
||||
import tqdm
|
||||
import sys
|
||||
import srsly
|
||||
|
||||
from ..util import registry
|
||||
from ..errors import Errors
|
||||
from .. import util
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..language import Language # noqa: F401
|
||||
|
@ -23,13 +26,44 @@ def setup_table(
|
|||
return final_cols, final_widths, ["r" for _ in final_widths]
|
||||
|
||||
|
||||
@registry.loggers("spacy.ConsoleLogger.v1")
|
||||
def console_logger(progress_bar: bool = False):
|
||||
@registry.loggers("spacy.ConsoleLogger.v2")
|
||||
def console_logger(
|
||||
progress_bar: bool = False,
|
||||
console_output: bool = True,
|
||||
output_file: Optional[Union[str, Path]] = None,
|
||||
):
|
||||
"""The ConsoleLogger.v2 prints out training logs in the console and/or saves them to a jsonl file.
|
||||
progress_bar (bool): Whether the logger should print the progress bar.
|
||||
console_output (bool): Whether the logger should print the logs on the console.
|
||||
output_file (Optional[Union[str, Path]]): The file to save the training logs to.
|
||||
"""
|
||||
_log_exist = False
|
||||
if output_file:
|
||||
output_file = util.ensure_path(output_file) # type: ignore
|
||||
if output_file.exists(): # type: ignore
|
||||
_log_exist = True
|
||||
if not output_file.parents[0].exists(): # type: ignore
|
||||
output_file.parents[0].mkdir(parents=True) # type: ignore
|
||||
|
||||
def setup_printer(
|
||||
nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr
|
||||
) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]]:
|
||||
write = lambda text: print(text, file=stdout, flush=True)
|
||||
msg = Printer(no_print=True)
|
||||
|
||||
nonlocal output_file
|
||||
output_stream = None
|
||||
if _log_exist:
|
||||
write(
|
||||
msg.warn(
|
||||
f"Saving logs is disabled because {output_file} already exists."
|
||||
)
|
||||
)
|
||||
output_file = None
|
||||
elif output_file:
|
||||
write(msg.info(f"Saving results to {output_file}"))
|
||||
output_stream = open(output_file, "w", encoding="utf-8")
|
||||
|
||||
# ensure that only trainable components are logged
|
||||
logged_pipes = [
|
||||
name
|
||||
|
@ -40,13 +74,15 @@ def console_logger(progress_bar: bool = False):
|
|||
score_weights = nlp.config["training"]["score_weights"]
|
||||
score_cols = [col for col, value in score_weights.items() if value is not None]
|
||||
loss_cols = [f"Loss {pipe}" for pipe in logged_pipes]
|
||||
spacing = 2
|
||||
table_header, table_widths, table_aligns = setup_table(
|
||||
cols=["E", "#"] + loss_cols + score_cols + ["Score"],
|
||||
widths=[3, 6] + [8 for _ in loss_cols] + [6 for _ in score_cols] + [6],
|
||||
)
|
||||
write(msg.row(table_header, widths=table_widths, spacing=spacing))
|
||||
write(msg.row(["-" * width for width in table_widths], spacing=spacing))
|
||||
|
||||
if console_output:
|
||||
spacing = 2
|
||||
table_header, table_widths, table_aligns = setup_table(
|
||||
cols=["E", "#"] + loss_cols + score_cols + ["Score"],
|
||||
widths=[3, 6] + [8 for _ in loss_cols] + [6 for _ in score_cols] + [6],
|
||||
)
|
||||
write(msg.row(table_header, widths=table_widths, spacing=spacing))
|
||||
write(msg.row(["-" * width for width in table_widths], spacing=spacing))
|
||||
progress = None
|
||||
|
||||
def log_step(info: Optional[Dict[str, Any]]) -> None:
|
||||
|
@ -57,12 +93,15 @@ def console_logger(progress_bar: bool = False):
|
|||
if progress is not None:
|
||||
progress.update(1)
|
||||
return
|
||||
losses = [
|
||||
"{0:.2f}".format(float(info["losses"][pipe_name]))
|
||||
for pipe_name in logged_pipes
|
||||
]
|
||||
|
||||
losses = []
|
||||
log_losses = {}
|
||||
for pipe_name in logged_pipes:
|
||||
losses.append("{0:.2f}".format(float(info["losses"][pipe_name])))
|
||||
log_losses[pipe_name] = float(info["losses"][pipe_name])
|
||||
|
||||
scores = []
|
||||
log_scores = {}
|
||||
for col in score_cols:
|
||||
score = info["other_scores"].get(col, 0.0)
|
||||
try:
|
||||
|
@ -73,6 +112,7 @@ def console_logger(progress_bar: bool = False):
|
|||
if col != "speed":
|
||||
score *= 100
|
||||
scores.append("{0:.2f}".format(score))
|
||||
log_scores[str(col)] = score
|
||||
|
||||
data = (
|
||||
[info["epoch"], info["step"]]
|
||||
|
@ -80,20 +120,36 @@ def console_logger(progress_bar: bool = False):
|
|||
+ scores
|
||||
+ ["{0:.2f}".format(float(info["score"]))]
|
||||
)
|
||||
|
||||
if output_stream:
|
||||
# Write to log file per log_step
|
||||
log_data = {
|
||||
"epoch": info["epoch"],
|
||||
"step": info["step"],
|
||||
"losses": log_losses,
|
||||
"scores": log_scores,
|
||||
"score": float(info["score"]),
|
||||
}
|
||||
output_stream.write(srsly.json_dumps(log_data) + "\n")
|
||||
|
||||
if progress is not None:
|
||||
progress.close()
|
||||
write(
|
||||
msg.row(data, widths=table_widths, aligns=table_aligns, spacing=spacing)
|
||||
)
|
||||
if progress_bar:
|
||||
# Set disable=None, so that it disables on non-TTY
|
||||
progress = tqdm.tqdm(
|
||||
total=eval_frequency, disable=None, leave=False, file=stderr
|
||||
if console_output:
|
||||
write(
|
||||
msg.row(
|
||||
data, widths=table_widths, aligns=table_aligns, spacing=spacing
|
||||
)
|
||||
)
|
||||
progress.set_description(f"Epoch {info['epoch']+1}")
|
||||
if progress_bar:
|
||||
# Set disable=None, so that it disables on non-TTY
|
||||
progress = tqdm.tqdm(
|
||||
total=eval_frequency, disable=None, leave=False, file=stderr
|
||||
)
|
||||
progress.set_description(f"Epoch {info['epoch']+1}")
|
||||
|
||||
def finalize() -> None:
|
||||
pass
|
||||
if output_stream:
|
||||
output_stream.close()
|
||||
|
||||
return log_step, finalize
|
||||
|
||||
|
|
|
@ -248,6 +248,59 @@ added to an existing vectors table. See more details in
|
|||
|
||||
## Loggers {#loggers}
|
||||
|
||||
These functions are available from `@spacy.registry.loggers`.
|
||||
|
||||
### spacy.ConsoleLogger.v1 {#ConsoleLogger_v1}
|
||||
|
||||
> #### Example config
|
||||
>
|
||||
> ```ini
|
||||
> [training.logger]
|
||||
> @loggers = "spacy.ConsoleLogger.v1"
|
||||
> progress_bar = true
|
||||
> ```
|
||||
|
||||
Writes the results of a training step to the console in a tabular format.
|
||||
|
||||
<Accordion title="Example console output" spaced>
|
||||
|
||||
```cli
|
||||
$ python -m spacy train config.cfg
|
||||
```
|
||||
|
||||
```
|
||||
ℹ Using CPU
|
||||
ℹ Loading config and nlp from: config.cfg
|
||||
ℹ Pipeline: ['tok2vec', 'tagger']
|
||||
ℹ Start training
|
||||
ℹ Training. Initial learn rate: 0.0
|
||||
|
||||
E # LOSS TOK2VEC LOSS TAGGER TAG_ACC SCORE
|
||||
--- ------ ------------ ----------- ------- ------
|
||||
0 0 0.00 86.20 0.22 0.00
|
||||
0 200 3.08 18968.78 34.00 0.34
|
||||
0 400 31.81 22539.06 33.64 0.34
|
||||
0 600 92.13 22794.91 43.80 0.44
|
||||
0 800 183.62 21541.39 56.05 0.56
|
||||
0 1000 352.49 25461.82 65.15 0.65
|
||||
0 1200 422.87 23708.82 71.84 0.72
|
||||
0 1400 601.92 24994.79 76.57 0.77
|
||||
0 1600 662.57 22268.02 80.20 0.80
|
||||
0 1800 1101.50 28413.77 82.56 0.83
|
||||
0 2000 1253.43 28736.36 85.00 0.85
|
||||
0 2200 1411.02 28237.53 87.42 0.87
|
||||
0 2400 1605.35 28439.95 88.70 0.89
|
||||
```
|
||||
|
||||
Note that the cumulative loss keeps increasing within one epoch, but should
|
||||
start decreasing across epochs.
|
||||
|
||||
</Accordion>
|
||||
|
||||
| Name | Description |
|
||||
| -------------- | --------------------------------------------------------- |
|
||||
| `progress_bar` | Whether the logger should print the progress bar ~~bool~~ |
|
||||
|
||||
Logging utilities for spaCy are implemented in the
|
||||
[`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the
|
||||
functions are typically available from `@spacy.registry.loggers`.
|
||||
|
|
|
@ -275,8 +275,8 @@ Render a dependency parse tree or named entity visualization.
|
|||
|
||||
### displacy.parse_deps {#displacy.parse_deps tag="method" new="2"}
|
||||
|
||||
Generate dependency parse in `{'words': [], 'arcs': []}` format.
|
||||
For use with the `manual=True` argument in `displacy.render`.
|
||||
Generate dependency parse in `{'words': [], 'arcs': []}` format. For use with
|
||||
the `manual=True` argument in `displacy.render`.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
|
@ -297,8 +297,8 @@ For use with the `manual=True` argument in `displacy.render`.
|
|||
|
||||
### displacy.parse_ents {#displacy.parse_ents tag="method" new="2"}
|
||||
|
||||
Generate named entities in `[{start: i, end: i, label: 'label'}]` format.
|
||||
For use with the `manual=True` argument in `displacy.render`.
|
||||
Generate named entities in `[{start: i, end: i, label: 'label'}]` format. For
|
||||
use with the `manual=True` argument in `displacy.render`.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
|
@ -319,8 +319,8 @@ For use with the `manual=True` argument in `displacy.render`.
|
|||
|
||||
### displacy.parse_spans {#displacy.parse_spans tag="method" new="2"}
|
||||
|
||||
Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format.
|
||||
For use with the `manual=True` argument in `displacy.render`.
|
||||
Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format. For
|
||||
use with the `manual=True` argument in `displacy.render`.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
|
@ -505,7 +505,7 @@ finished. To log each training step, a
|
|||
and the accuracy scores on the development set.
|
||||
|
||||
The built-in, default logger is the ConsoleLogger, which prints results to the
|
||||
console in tabular format. The
|
||||
console in tabular format and saves them to a `jsonl` file. The
|
||||
[spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as
|
||||
a dependency of spaCy, enables other loggers, such as one that sends results to
|
||||
a [Weights & Biases](https://www.wandb.com/) dashboard.
|
||||
|
@ -513,16 +513,20 @@ a [Weights & Biases](https://www.wandb.com/) dashboard.
|
|||
Instead of using one of the built-in loggers, you can
|
||||
[implement your own](/usage/training#custom-logging).
|
||||
|
||||
#### spacy.ConsoleLogger.v1 {#ConsoleLogger tag="registered function"}
|
||||
#### spacy.ConsoleLogger.v2 {#ConsoleLogger tag="registered function"}
|
||||
|
||||
> #### Example config
|
||||
>
|
||||
> ```ini
|
||||
> [training.logger]
|
||||
> @loggers = "spacy.ConsoleLogger.v1"
|
||||
> @loggers = "spacy.ConsoleLogger.v2"
|
||||
> progress_bar = true
|
||||
> console_output = true
|
||||
> output_file = "training_log.jsonl"
|
||||
> ```
|
||||
|
||||
Writes the results of a training step to the console in a tabular format.
|
||||
Writes the results of a training step to the console in a tabular format and
|
||||
saves them to a `jsonl` file.
|
||||
|
||||
<Accordion title="Example console output" spaced>
|
||||
|
||||
|
@ -536,22 +540,23 @@ $ python -m spacy train config.cfg
|
|||
ℹ Pipeline: ['tok2vec', 'tagger']
|
||||
ℹ Start training
|
||||
ℹ Training. Initial learn rate: 0.0
|
||||
ℹ Saving results to training_log.jsonl
|
||||
|
||||
E # LOSS TOK2VEC LOSS TAGGER TAG_ACC SCORE
|
||||
--- ------ ------------ ----------- ------- ------
|
||||
1 0 0.00 86.20 0.22 0.00
|
||||
1 200 3.08 18968.78 34.00 0.34
|
||||
1 400 31.81 22539.06 33.64 0.34
|
||||
1 600 92.13 22794.91 43.80 0.44
|
||||
1 800 183.62 21541.39 56.05 0.56
|
||||
1 1000 352.49 25461.82 65.15 0.65
|
||||
1 1200 422.87 23708.82 71.84 0.72
|
||||
1 1400 601.92 24994.79 76.57 0.77
|
||||
1 1600 662.57 22268.02 80.20 0.80
|
||||
1 1800 1101.50 28413.77 82.56 0.83
|
||||
1 2000 1253.43 28736.36 85.00 0.85
|
||||
1 2200 1411.02 28237.53 87.42 0.87
|
||||
1 2400 1605.35 28439.95 88.70 0.89
|
||||
0 0 0.00 86.20 0.22 0.00
|
||||
0 200 3.08 18968.78 34.00 0.34
|
||||
0 400 31.81 22539.06 33.64 0.34
|
||||
0 600 92.13 22794.91 43.80 0.44
|
||||
0 800 183.62 21541.39 56.05 0.56
|
||||
0 1000 352.49 25461.82 65.15 0.65
|
||||
0 1200 422.87 23708.82 71.84 0.72
|
||||
0 1400 601.92 24994.79 76.57 0.77
|
||||
0 1600 662.57 22268.02 80.20 0.80
|
||||
0 1800 1101.50 28413.77 82.56 0.83
|
||||
0 2000 1253.43 28736.36 85.00 0.85
|
||||
0 2200 1411.02 28237.53 87.42 0.87
|
||||
0 2400 1605.35 28439.95 88.70 0.89
|
||||
```
|
||||
|
||||
Note that the cumulative loss keeps increasing within one epoch, but should
|
||||
|
@ -559,6 +564,12 @@ start decreasing across epochs.
|
|||
|
||||
</Accordion>
|
||||
|
||||
| Name | Description |
|
||||
| ---------------- | --------------------------------------------------------------------- |
|
||||
| `progress_bar` | Whether the logger should print the progress bar ~~bool~~ |
|
||||
| `console_output` | Whether the logger should print the logs on the console. ~~bool~~ |
|
||||
| `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ |
|
||||
|
||||
## Readers {#readers}
|
||||
|
||||
### File readers {#file-readers source="github.com/explosion/srsly" new="3"}
|
||||
|
|
Loading…
Reference in New Issue
Block a user