mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 13:11:03 +03:00 
			
		
		
		
	* Add initial design for diff command For now, the diffing process looks like this: - The default config is created based from some values in the user config (e.g. which pipeline components were used, the lang, etc.) - The user must supply manually if it was optimized for acc/efficiency and if pretraining was involved. * Make diff command structure similar to siblings * Include gpu as a user option for CLI * Make variables more explicit * Fix type declaration for optimize enum * Improve docstrings for diff CLI * Add debug-diff to website API docs * Switch position of configs so that user config is modded * Add markdown flag for debug diff This commit adds a --markdown (--md) flag that allows easier copy-pasting to Github issues. Please note that this commit is dependent on an unreleased version of wasabi (for the time being). For posterity, the related PR is found here: https://github.com/ines/wasabi/pull/20 * Bump version of wasabi to 0.9.1 So that we can use the add_symbols parameter. * Apply suggestions from code review Co-authored-by: Ines Montani <ines@ines.io> * Update docs based on code review suggestions Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Change command name from diff -> diff-config * Clarify when options are relevant or not * Rerun prettier on cli.md Co-authored-by: Ines Montani <ines@ines.io> Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
		
			
				
	
	
		
			90 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			90 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from typing import Optional
 | |
| 
 | |
| import typer
 | |
| from wasabi import Printer, diff_strings, MarkdownRenderer
 | |
| from pathlib import Path
 | |
| from thinc.api import Config
 | |
| 
 | |
| from ._util import debug_cli, Arg, Opt, show_validation_error, parse_config_overrides
 | |
| from ..util import load_config
 | |
| from .init_config import init_config, Optimizations
 | |
| 
 | |
| 
 | |
| @debug_cli.command(
 | |
|     "diff-config",
 | |
|     context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
 | |
| )
 | |
| def debug_diff_cli(
 | |
|     # fmt: off
 | |
|     ctx: typer.Context,
 | |
|     config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
 | |
|     compare_to: Optional[Path] = Opt(None, help="Path to a config file to diff against, or `None` to compare against default settings", exists=True, allow_dash=True),
 | |
|     optimize: Optimizations = Opt(Optimizations.efficiency.value, "--optimize", "-o", help="Whether the user config was optimized for efficiency or accuracy. Only relevant when comparing against the default config."),
 | |
|     gpu: bool = Opt(False, "--gpu", "-G", help="Whether the original config can run on a GPU. Only relevant when comparing against the default config."),
 | |
|     pretraining: bool = Opt(False, "--pretraining", "--pt", help="Whether to compare on a config with pretraining involved. Only relevant when comparing against the default config."),
 | |
|     markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues")
 | |
|     # fmt: on
 | |
| ):
 | |
|     """Show a diff of a config file with respect to spaCy's defaults or another config file. If
 | |
|     additional settings were used in the creation of the config file, then you
 | |
|     must supply these as extra parameters to the command when comparing to the default settings. The generated diff
 | |
|     can also be used when posting to the discussion forum to provide more
 | |
|     information for the maintainers.
 | |
| 
 | |
|     The `optimize`, `gpu`, and `pretraining` options are only relevant when
 | |
|     comparing against the default configuration (or specifically when `compare_to` is None).
 | |
| 
 | |
|     DOCS: https://spacy.io/api/cli#debug-diff
 | |
|     """
 | |
|     debug_diff(
 | |
|         config_path=config_path,
 | |
|         compare_to=compare_to,
 | |
|         gpu=gpu,
 | |
|         optimize=optimize,
 | |
|         pretraining=pretraining,
 | |
|         markdown=markdown,
 | |
|     )
 | |
| 
 | |
| 
 | |
| def debug_diff(
 | |
|     config_path: Path,
 | |
|     compare_to: Optional[Path],
 | |
|     gpu: bool,
 | |
|     optimize: Optimizations,
 | |
|     pretraining: bool,
 | |
|     markdown: bool,
 | |
| ):
 | |
|     msg = Printer()
 | |
|     with show_validation_error(hint_fill=False):
 | |
|         user_config = load_config(config_path)
 | |
|         if compare_to:
 | |
|             other_config = load_config(compare_to)
 | |
|         else:
 | |
|             # Recreate a default config based from user's config
 | |
|             lang = user_config["nlp"]["lang"]
 | |
|             pipeline = list(user_config["nlp"]["pipeline"])
 | |
|             msg.info(f"Found user-defined language: '{lang}'")
 | |
|             msg.info(f"Found user-defined pipelines: {pipeline}")
 | |
|             other_config = init_config(
 | |
|                 lang=lang,
 | |
|                 pipeline=pipeline,
 | |
|                 optimize=optimize.value,
 | |
|                 gpu=gpu,
 | |
|                 pretraining=pretraining,
 | |
|                 silent=True,
 | |
|             )
 | |
| 
 | |
|     user = user_config.to_str()
 | |
|     other = other_config.to_str()
 | |
| 
 | |
|     if user == other:
 | |
|         msg.warn("No diff to show: configs are identical")
 | |
|     else:
 | |
|         diff_text = diff_strings(other, user, add_symbols=markdown)
 | |
|         if markdown:
 | |
|             md = MarkdownRenderer()
 | |
|             md.add(md.code_block(diff_text, "diff"))
 | |
|             print(md.text)
 | |
|         else:
 | |
|             print(diff_text)
 |