mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 13:11:03 +03:00 
			
		
		
		
	Add ConsoleLogger.v2 (#11214)
* Init * Change logger to ConsoleLogger.v2 * adjust naming * More naming adjustments * Fix output_file reference error * ignore type * Add basic test for logger * Hopefully fix mypy issue * mypy ignore line * Update mypy line Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Update test method name Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Change file saving logic * Fix finalize method * increase spacy-legacy version in requirements * Update docs * small adjustments Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
		
							parent
							
								
									ba33200979
								
							
						
					
					
						commit
						6723d76f24
					
				|  | @ -1,5 +1,5 @@ | |||
| # Our libraries | ||||
| spacy-legacy>=3.0.9,<3.1.0 | ||||
| spacy-legacy>=3.0.10,<3.1.0 | ||||
| spacy-loggers>=1.0.0,<2.0.0 | ||||
| cymem>=2.0.2,<2.1.0 | ||||
| preshed>=3.0.2,<3.1.0 | ||||
|  |  | |||
|  | @ -41,7 +41,7 @@ setup_requires = | |||
|     thinc>=8.1.0,<8.2.0 | ||||
| install_requires = | ||||
|     # Our libraries | ||||
|     spacy-legacy>=3.0.9,<3.1.0 | ||||
|     spacy-legacy>=3.0.10,<3.1.0 | ||||
|     spacy-loggers>=1.0.0,<2.0.0 | ||||
|     murmurhash>=0.28.0,<1.1.0 | ||||
|     cymem>=2.0.2,<2.1.0 | ||||
|  |  | |||
							
								
								
									
										30
									
								
								spacy/tests/training/test_logger.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								spacy/tests/training/test_logger.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,30 @@ | |||
| import pytest | ||||
| import spacy | ||||
| 
 | ||||
| from spacy.training import loggers | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture() | ||||
| def nlp(): | ||||
|     nlp = spacy.blank("en") | ||||
|     nlp.add_pipe("ner") | ||||
|     return nlp | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture() | ||||
| def info(): | ||||
|     return { | ||||
|         "losses": {"ner": 100}, | ||||
|         "other_scores": {"ENTS_F": 0.85, "ENTS_P": 0.90, "ENTS_R": 0.80}, | ||||
|         "epoch": 100, | ||||
|         "step": 125, | ||||
|         "score": 85, | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
| def test_console_logger(nlp, info): | ||||
|     console_logger = loggers.console_logger( | ||||
|         progress_bar=True, console_output=True, output_file=None | ||||
|     ) | ||||
|     log_step, finalize = console_logger(nlp) | ||||
|     log_step(info) | ||||
|  | @ -1,10 +1,13 @@ | |||
| from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO | ||||
| from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO, Union | ||||
| from wasabi import Printer | ||||
| from pathlib import Path | ||||
| import tqdm | ||||
| import sys | ||||
| import srsly | ||||
| 
 | ||||
| from ..util import registry | ||||
| from ..errors import Errors | ||||
| from .. import util | ||||
| 
 | ||||
| if TYPE_CHECKING: | ||||
|     from ..language import Language  # noqa: F401 | ||||
|  | @ -23,13 +26,44 @@ def setup_table( | |||
|     return final_cols, final_widths, ["r" for _ in final_widths] | ||||
| 
 | ||||
| 
 | ||||
| @registry.loggers("spacy.ConsoleLogger.v1") | ||||
| def console_logger(progress_bar: bool = False): | ||||
| @registry.loggers("spacy.ConsoleLogger.v2") | ||||
| def console_logger( | ||||
|     progress_bar: bool = False, | ||||
|     console_output: bool = True, | ||||
|     output_file: Optional[Union[str, Path]] = None, | ||||
| ): | ||||
|     """The ConsoleLogger.v2 prints out training logs in the console and/or saves them to a jsonl file. | ||||
|     progress_bar (bool): Whether the logger should print the progress bar. | ||||
|     console_output (bool): Whether the logger should print the logs on the console. | ||||
|     output_file (Optional[Union[str, Path]]): The file to save the training logs to. | ||||
|     """ | ||||
|     _log_exist = False | ||||
|     if output_file: | ||||
|         output_file = util.ensure_path(output_file)  # type: ignore | ||||
|         if output_file.exists():  # type: ignore | ||||
|             _log_exist = True | ||||
|         if not output_file.parents[0].exists():  # type: ignore | ||||
|             output_file.parents[0].mkdir(parents=True)  # type: ignore | ||||
| 
 | ||||
|     def setup_printer( | ||||
|         nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr | ||||
|     ) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]]: | ||||
|         write = lambda text: print(text, file=stdout, flush=True) | ||||
|         msg = Printer(no_print=True) | ||||
| 
 | ||||
|         nonlocal output_file | ||||
|         output_stream = None | ||||
|         if _log_exist: | ||||
|             write( | ||||
|                 msg.warn( | ||||
|                     f"Saving logs is disabled because {output_file} already exists." | ||||
|                 ) | ||||
|             ) | ||||
|             output_file = None | ||||
|         elif output_file: | ||||
|             write(msg.info(f"Saving results to {output_file}")) | ||||
|             output_stream = open(output_file, "w", encoding="utf-8") | ||||
| 
 | ||||
|         # ensure that only trainable components are logged | ||||
|         logged_pipes = [ | ||||
|             name | ||||
|  | @ -40,6 +74,8 @@ def console_logger(progress_bar: bool = False): | |||
|         score_weights = nlp.config["training"]["score_weights"] | ||||
|         score_cols = [col for col, value in score_weights.items() if value is not None] | ||||
|         loss_cols = [f"Loss {pipe}" for pipe in logged_pipes] | ||||
| 
 | ||||
|         if console_output: | ||||
|             spacing = 2 | ||||
|             table_header, table_widths, table_aligns = setup_table( | ||||
|                 cols=["E", "#"] + loss_cols + score_cols + ["Score"], | ||||
|  | @ -57,12 +93,15 @@ def console_logger(progress_bar: bool = False): | |||
|                 if progress is not None: | ||||
|                     progress.update(1) | ||||
|                 return | ||||
|             losses = [ | ||||
|                 "{0:.2f}".format(float(info["losses"][pipe_name])) | ||||
|                 for pipe_name in logged_pipes | ||||
|             ] | ||||
| 
 | ||||
|             losses = [] | ||||
|             log_losses = {} | ||||
|             for pipe_name in logged_pipes: | ||||
|                 losses.append("{0:.2f}".format(float(info["losses"][pipe_name]))) | ||||
|                 log_losses[pipe_name] = float(info["losses"][pipe_name]) | ||||
| 
 | ||||
|             scores = [] | ||||
|             log_scores = {} | ||||
|             for col in score_cols: | ||||
|                 score = info["other_scores"].get(col, 0.0) | ||||
|                 try: | ||||
|  | @ -73,6 +112,7 @@ def console_logger(progress_bar: bool = False): | |||
|                 if col != "speed": | ||||
|                     score *= 100 | ||||
|                 scores.append("{0:.2f}".format(score)) | ||||
|                 log_scores[str(col)] = score | ||||
| 
 | ||||
|             data = ( | ||||
|                 [info["epoch"], info["step"]] | ||||
|  | @ -80,10 +120,25 @@ def console_logger(progress_bar: bool = False): | |||
|                 + scores | ||||
|                 + ["{0:.2f}".format(float(info["score"]))] | ||||
|             ) | ||||
| 
 | ||||
|             if output_stream: | ||||
|                 # Write to log file per log_step | ||||
|                 log_data = { | ||||
|                     "epoch": info["epoch"], | ||||
|                     "step": info["step"], | ||||
|                     "losses": log_losses, | ||||
|                     "scores": log_scores, | ||||
|                     "score": float(info["score"]), | ||||
|                 } | ||||
|                 output_stream.write(srsly.json_dumps(log_data) + "\n") | ||||
| 
 | ||||
|             if progress is not None: | ||||
|                 progress.close() | ||||
|             if console_output: | ||||
|                 write( | ||||
|                 msg.row(data, widths=table_widths, aligns=table_aligns, spacing=spacing) | ||||
|                     msg.row( | ||||
|                         data, widths=table_widths, aligns=table_aligns, spacing=spacing | ||||
|                     ) | ||||
|                 ) | ||||
|                 if progress_bar: | ||||
|                     # Set disable=None, so that it disables on non-TTY | ||||
|  | @ -93,7 +148,8 @@ def console_logger(progress_bar: bool = False): | |||
|                     progress.set_description(f"Epoch {info['epoch']+1}") | ||||
| 
 | ||||
|         def finalize() -> None: | ||||
|             pass | ||||
|             if output_stream: | ||||
|                 output_stream.close() | ||||
| 
 | ||||
|         return log_step, finalize | ||||
| 
 | ||||
|  |  | |||
|  | @ -248,6 +248,59 @@ added to an existing vectors table. See more details in | |||
| 
 | ||||
| ## Loggers {#loggers} | ||||
| 
 | ||||
| These functions are available from `@spacy.registry.loggers`. | ||||
| 
 | ||||
| ### spacy.ConsoleLogger.v1 {#ConsoleLogger_v1} | ||||
| 
 | ||||
| > #### Example config | ||||
| > | ||||
| > ```ini | ||||
| > [training.logger] | ||||
| > @loggers = "spacy.ConsoleLogger.v1" | ||||
| > progress_bar = true | ||||
| > ``` | ||||
| 
 | ||||
| Writes the results of a training step to the console in a tabular format. | ||||
| 
 | ||||
| <Accordion title="Example console output" spaced> | ||||
| 
 | ||||
| ```cli | ||||
| $ python -m spacy train config.cfg | ||||
| ``` | ||||
| 
 | ||||
| ``` | ||||
| ℹ Using CPU | ||||
| ℹ Loading config and nlp from: config.cfg | ||||
| ℹ Pipeline: ['tok2vec', 'tagger'] | ||||
| ℹ Start training | ||||
| ℹ Training. Initial learn rate: 0.0 | ||||
| 
 | ||||
| E     #        LOSS TOK2VEC   LOSS TAGGER   TAG_ACC   SCORE | ||||
| ---   ------   ------------   -----------   -------   ------ | ||||
|   0        0           0.00         86.20      0.22     0.00 | ||||
|   0      200           3.08      18968.78     34.00     0.34 | ||||
|   0      400          31.81      22539.06     33.64     0.34 | ||||
|   0      600          92.13      22794.91     43.80     0.44 | ||||
|   0      800         183.62      21541.39     56.05     0.56 | ||||
|   0     1000         352.49      25461.82     65.15     0.65 | ||||
|   0     1200         422.87      23708.82     71.84     0.72 | ||||
|   0     1400         601.92      24994.79     76.57     0.77 | ||||
|   0     1600         662.57      22268.02     80.20     0.80 | ||||
|   0     1800        1101.50      28413.77     82.56     0.83 | ||||
|   0     2000        1253.43      28736.36     85.00     0.85 | ||||
|   0     2200        1411.02      28237.53     87.42     0.87 | ||||
|   0     2400        1605.35      28439.95     88.70     0.89 | ||||
| ``` | ||||
| 
 | ||||
| Note that the cumulative loss keeps increasing within one epoch, but should | ||||
| start decreasing across epochs. | ||||
| 
 | ||||
|  </Accordion> | ||||
| 
 | ||||
| | Name           | Description                                               | | ||||
| | -------------- | --------------------------------------------------------- | | ||||
| | `progress_bar` | Whether the logger should print the progress bar ~~bool~~ | | ||||
| 
 | ||||
| Logging utilities for spaCy are implemented in the | ||||
| [`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the | ||||
| functions are typically available from `@spacy.registry.loggers`. | ||||
|  |  | |||
|  | @ -275,8 +275,8 @@ Render a dependency parse tree or named entity visualization. | |||
| 
 | ||||
| ### displacy.parse_deps {#displacy.parse_deps tag="method" new="2"} | ||||
| 
 | ||||
| Generate dependency parse in `{'words': [], 'arcs': []}` format. | ||||
| For use with the `manual=True` argument in `displacy.render`. | ||||
| Generate dependency parse in `{'words': [], 'arcs': []}` format. For use with | ||||
| the `manual=True` argument in `displacy.render`. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
|  | @ -297,8 +297,8 @@ For use with the `manual=True` argument in `displacy.render`. | |||
| 
 | ||||
| ### displacy.parse_ents {#displacy.parse_ents tag="method" new="2"} | ||||
| 
 | ||||
| Generate named entities in `[{start: i, end: i, label: 'label'}]` format. | ||||
| For use with the `manual=True` argument in `displacy.render`. | ||||
| Generate named entities in `[{start: i, end: i, label: 'label'}]` format. For | ||||
| use with the `manual=True` argument in `displacy.render`. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
|  | @ -319,8 +319,8 @@ For use with the `manual=True` argument in `displacy.render`. | |||
| 
 | ||||
| ### displacy.parse_spans {#displacy.parse_spans tag="method" new="2"} | ||||
| 
 | ||||
| Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format. | ||||
| For use with the `manual=True` argument in `displacy.render`. | ||||
| Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format. For | ||||
| use with the `manual=True` argument in `displacy.render`. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
|  | @ -505,7 +505,7 @@ finished. To log each training step, a | |||
| and the accuracy scores on the development set. | ||||
| 
 | ||||
| The built-in, default logger is the ConsoleLogger, which prints results to the | ||||
| console in tabular format. The | ||||
| console in tabular format and saves them to a `jsonl` file. The | ||||
| [spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as | ||||
| a dependency of spaCy, enables other loggers, such as one that sends results to | ||||
| a [Weights & Biases](https://www.wandb.com/) dashboard. | ||||
|  | @ -513,16 +513,20 @@ a [Weights & Biases](https://www.wandb.com/) dashboard. | |||
| Instead of using one of the built-in loggers, you can | ||||
| [implement your own](/usage/training#custom-logging). | ||||
| 
 | ||||
| #### spacy.ConsoleLogger.v1 {#ConsoleLogger tag="registered function"} | ||||
| #### spacy.ConsoleLogger.v2 {#ConsoleLogger tag="registered function"} | ||||
| 
 | ||||
| > #### Example config | ||||
| > | ||||
| > ```ini | ||||
| > [training.logger] | ||||
| > @loggers = "spacy.ConsoleLogger.v1" | ||||
| > @loggers = "spacy.ConsoleLogger.v2" | ||||
| > progress_bar = true | ||||
| > console_output = true | ||||
| > output_file = "training_log.jsonl" | ||||
| > ``` | ||||
| 
 | ||||
| Writes the results of a training step to the console in a tabular format. | ||||
| Writes the results of a training step to the console in a tabular format and | ||||
| saves them to a `jsonl` file. | ||||
| 
 | ||||
| <Accordion title="Example console output" spaced> | ||||
| 
 | ||||
|  | @ -536,22 +540,23 @@ $ python -m spacy train config.cfg | |||
| ℹ Pipeline: ['tok2vec', 'tagger'] | ||||
| ℹ Start training | ||||
| ℹ Training. Initial learn rate: 0.0 | ||||
| ℹ Saving results to training_log.jsonl | ||||
| 
 | ||||
| E     #        LOSS TOK2VEC   LOSS TAGGER   TAG_ACC   SCORE | ||||
| ---   ------   ------------   -----------   -------   ------ | ||||
|   1        0           0.00         86.20      0.22     0.00 | ||||
|   1      200           3.08      18968.78     34.00     0.34 | ||||
|   1      400          31.81      22539.06     33.64     0.34 | ||||
|   1      600          92.13      22794.91     43.80     0.44 | ||||
|   1      800         183.62      21541.39     56.05     0.56 | ||||
|   1     1000         352.49      25461.82     65.15     0.65 | ||||
|   1     1200         422.87      23708.82     71.84     0.72 | ||||
|   1     1400         601.92      24994.79     76.57     0.77 | ||||
|   1     1600         662.57      22268.02     80.20     0.80 | ||||
|   1     1800        1101.50      28413.77     82.56     0.83 | ||||
|   1     2000        1253.43      28736.36     85.00     0.85 | ||||
|   1     2200        1411.02      28237.53     87.42     0.87 | ||||
|   1     2400        1605.35      28439.95     88.70     0.89 | ||||
|   0        0           0.00         86.20      0.22     0.00 | ||||
|   0      200           3.08      18968.78     34.00     0.34 | ||||
|   0      400          31.81      22539.06     33.64     0.34 | ||||
|   0      600          92.13      22794.91     43.80     0.44 | ||||
|   0      800         183.62      21541.39     56.05     0.56 | ||||
|   0     1000         352.49      25461.82     65.15     0.65 | ||||
|   0     1200         422.87      23708.82     71.84     0.72 | ||||
|   0     1400         601.92      24994.79     76.57     0.77 | ||||
|   0     1600         662.57      22268.02     80.20     0.80 | ||||
|   0     1800        1101.50      28413.77     82.56     0.83 | ||||
|   0     2000        1253.43      28736.36     85.00     0.85 | ||||
|   0     2200        1411.02      28237.53     87.42     0.87 | ||||
|   0     2400        1605.35      28439.95     88.70     0.89 | ||||
| ``` | ||||
| 
 | ||||
| Note that the cumulative loss keeps increasing within one epoch, but should | ||||
|  | @ -559,6 +564,12 @@ start decreasing across epochs. | |||
| 
 | ||||
|  </Accordion> | ||||
| 
 | ||||
| | Name             | Description                                                           | | ||||
| | ---------------- | --------------------------------------------------------------------- | | ||||
| | `progress_bar`   | Whether the logger should print the progress bar ~~bool~~             | | ||||
| | `console_output` | Whether the logger should print the logs on the console. ~~bool~~     | | ||||
| | `output_file`    | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ | | ||||
| 
 | ||||
| ## Readers {#readers} | ||||
| 
 | ||||
| ### File readers {#file-readers source="github.com/explosion/srsly" new="3"} | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user