mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 21:21:10 +03:00 
			
		
		
		
	Add support for multiple code files to all relevant commands
Prior to this, only the package command supported multiple code files.
This commit is contained in:
		
							parent
							
								
									2c2e66e145
								
							
						
					
					
						commit
						05af71cc39
					
				|  | @ -318,6 +318,13 @@ def show_validation_error( | ||||||
|         msg.fail("Config validation error", e, exits=1) |         msg.fail("Config validation error", e, exits=1) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def import_code_paths(code_paths: str) -> None: | ||||||
|  |     """Helper to import comma-separated list of code paths.""" | ||||||
|  |     code_paths = [Path(p.strip()) for p in string_to_list(code_paths)] | ||||||
|  |     for code_path in code_paths: | ||||||
|  |         import_code(code_path) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def import_code(code_path: Optional[Union[Path, str]]) -> None: | def import_code(code_path: Optional[Union[Path, str]]) -> None: | ||||||
|     """Helper to import Python file provided in training commands / commands |     """Helper to import Python file provided in training commands / commands | ||||||
|     using the config. This makes custom registered functions available. |     using the config. This makes custom registered functions available. | ||||||
|  |  | ||||||
|  | @ -5,7 +5,7 @@ import typer | ||||||
| import logging | import logging | ||||||
| 
 | 
 | ||||||
| from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error | from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error | ||||||
| from ._util import import_code | from ._util import import_code_paths | ||||||
| from .. import util | from .. import util | ||||||
| from ..util import get_sourced_components, load_model_from_config | from ..util import get_sourced_components, load_model_from_config | ||||||
| 
 | 
 | ||||||
|  | @ -19,7 +19,7 @@ def assemble_cli( | ||||||
|     ctx: typer.Context,  # This is only used to read additional arguments |     ctx: typer.Context,  # This is only used to read additional arguments | ||||||
|     config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True), |     config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True), | ||||||
|     output_path: Path = Arg(..., help="Output directory to store assembled pipeline in"), |     output_path: Path = Arg(..., help="Output directory to store assembled pipeline in"), | ||||||
|     code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), |     code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"), | ||||||
|     verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"), |     verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"), | ||||||
|     # fmt: on |     # fmt: on | ||||||
| ): | ): | ||||||
|  | @ -38,7 +38,7 @@ def assemble_cli( | ||||||
|     if not config_path or (str(config_path) != "-" and not config_path.exists()): |     if not config_path or (str(config_path) != "-" and not config_path.exists()): | ||||||
|         msg.fail("Config file not found", config_path, exits=1) |         msg.fail("Config file not found", config_path, exits=1) | ||||||
|     overrides = parse_config_overrides(ctx.args) |     overrides = parse_config_overrides(ctx.args) | ||||||
|     import_code(code_path) |     import_code_paths(code_paths) | ||||||
|     with show_validation_error(config_path): |     with show_validation_error(config_path): | ||||||
|         config = util.load_config(config_path, overrides=overrides, interpolate=False) |         config = util.load_config(config_path, overrides=overrides, interpolate=False) | ||||||
|     msg.divider("Initializing pipeline") |     msg.divider("Initializing pipeline") | ||||||
|  |  | ||||||
|  | @ -6,7 +6,7 @@ from thinc.config import VARIABLE_RE | ||||||
| import typer | import typer | ||||||
| 
 | 
 | ||||||
| from ._util import Arg, Opt, show_validation_error, parse_config_overrides | from ._util import Arg, Opt, show_validation_error, parse_config_overrides | ||||||
| from ._util import import_code, debug_cli | from ._util import import_code_paths, debug_cli | ||||||
| from ..schemas import ConfigSchemaInit, ConfigSchemaTraining | from ..schemas import ConfigSchemaInit, ConfigSchemaTraining | ||||||
| from ..util import registry | from ..util import registry | ||||||
| from .. import util | from .. import util | ||||||
|  | @ -20,7 +20,7 @@ def debug_config_cli( | ||||||
|     # fmt: off |     # fmt: off | ||||||
|     ctx: typer.Context,  # This is only used to read additional arguments |     ctx: typer.Context,  # This is only used to read additional arguments | ||||||
|     config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True), |     config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True), | ||||||
|     code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), |     code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"), | ||||||
|     show_funcs: bool = Opt(False, "--show-functions", "-F", help="Show an overview of all registered functions used in the config and where they come from (modules, files etc.)"), |     show_funcs: bool = Opt(False, "--show-functions", "-F", help="Show an overview of all registered functions used in the config and where they come from (modules, files etc.)"), | ||||||
|     show_vars: bool = Opt(False, "--show-variables", "-V", help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI.") |     show_vars: bool = Opt(False, "--show-variables", "-V", help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI.") | ||||||
|     # fmt: on |     # fmt: on | ||||||
|  | @ -37,7 +37,7 @@ def debug_config_cli( | ||||||
|     DOCS: https://spacy.io/api/cli#debug-config |     DOCS: https://spacy.io/api/cli#debug-config | ||||||
|     """ |     """ | ||||||
|     overrides = parse_config_overrides(ctx.args) |     overrides = parse_config_overrides(ctx.args) | ||||||
|     import_code(code_path) |     import_code_paths(code_paths) | ||||||
|     debug_config( |     debug_config( | ||||||
|         config_path, overrides=overrides, show_funcs=show_funcs, show_vars=show_vars |         config_path, overrides=overrides, show_funcs=show_funcs, show_vars=show_vars | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|  | @ -9,7 +9,7 @@ import typer | ||||||
| import math | import math | ||||||
| 
 | 
 | ||||||
| from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides | from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides | ||||||
| from ._util import import_code, debug_cli, _format_number | from ._util import import_code_paths, debug_cli, _format_number | ||||||
| from ..training import Example, remove_bilu_prefix | from ..training import Example, remove_bilu_prefix | ||||||
| from ..training.initialize import get_sourced_components | from ..training.initialize import get_sourced_components | ||||||
| from ..schemas import ConfigSchemaTraining | from ..schemas import ConfigSchemaTraining | ||||||
|  | @ -52,7 +52,7 @@ def debug_data_cli( | ||||||
|     # fmt: off |     # fmt: off | ||||||
|     ctx: typer.Context,  # This is only used to read additional arguments |     ctx: typer.Context,  # This is only used to read additional arguments | ||||||
|     config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True), |     config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True), | ||||||
|     code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), |     code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"), | ||||||
|     ignore_warnings: bool = Opt(False, "--ignore-warnings", "-IW", help="Ignore warnings, only show stats and errors"), |     ignore_warnings: bool = Opt(False, "--ignore-warnings", "-IW", help="Ignore warnings, only show stats and errors"), | ||||||
|     verbose: bool = Opt(False, "--verbose", "-V", help="Print additional information and explanations"), |     verbose: bool = Opt(False, "--verbose", "-V", help="Print additional information and explanations"), | ||||||
|     no_format: bool = Opt(False, "--no-format", "-NF", help="Don't pretty-print the results"), |     no_format: bool = Opt(False, "--no-format", "-NF", help="Don't pretty-print the results"), | ||||||
|  | @ -72,7 +72,7 @@ def debug_data_cli( | ||||||
|             "--help for an overview of the other available debugging commands." |             "--help for an overview of the other available debugging commands." | ||||||
|         ) |         ) | ||||||
|     overrides = parse_config_overrides(ctx.args) |     overrides = parse_config_overrides(ctx.args) | ||||||
|     import_code(code_path) |     import_code_paths(code_paths) | ||||||
|     debug_data( |     debug_data( | ||||||
|         config_path, |         config_path, | ||||||
|         config_overrides=overrides, |         config_overrides=overrides, | ||||||
|  |  | ||||||
|  | @ -7,7 +7,7 @@ from thinc.api import fix_random_seed | ||||||
| 
 | 
 | ||||||
| from ..training import Corpus | from ..training import Corpus | ||||||
| from ..tokens import Doc | from ..tokens import Doc | ||||||
| from ._util import app, Arg, Opt, setup_gpu, import_code | from ._util import app, Arg, Opt, setup_gpu, import_code_paths | ||||||
| from ..scorer import Scorer | from ..scorer import Scorer | ||||||
| from .. import util | from .. import util | ||||||
| from .. import displacy | from .. import displacy | ||||||
|  | @ -19,7 +19,7 @@ def evaluate_cli( | ||||||
|     model: str = Arg(..., help="Model name or path"), |     model: str = Arg(..., help="Model name or path"), | ||||||
|     data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True), |     data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True), | ||||||
|     output: Optional[Path] = Opt(None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False), |     output: Optional[Path] = Opt(None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False), | ||||||
|     code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), |     code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"), | ||||||
|     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"), |     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"), | ||||||
|     gold_preproc: bool = Opt(False, "--gold-preproc", "-G", help="Use gold preprocessing"), |     gold_preproc: bool = Opt(False, "--gold-preproc", "-G", help="Use gold preprocessing"), | ||||||
|     displacy_path: Optional[Path] = Opt(None, "--displacy-path", "-dp", help="Directory to output rendered parses as HTML", exists=True, file_okay=False), |     displacy_path: Optional[Path] = Opt(None, "--displacy-path", "-dp", help="Directory to output rendered parses as HTML", exists=True, file_okay=False), | ||||||
|  | @ -38,7 +38,7 @@ def evaluate_cli( | ||||||
| 
 | 
 | ||||||
|     DOCS: https://spacy.io/api/cli#evaluate |     DOCS: https://spacy.io/api/cli#evaluate | ||||||
|     """ |     """ | ||||||
|     import_code(code_path) |     import_code_paths(code_paths) | ||||||
|     evaluate( |     evaluate( | ||||||
|         model, |         model, | ||||||
|         data_path, |         data_path, | ||||||
|  |  | ||||||
|  | @ -20,7 +20,7 @@ def package_cli( | ||||||
|     # fmt: off |     # fmt: off | ||||||
|     input_dir: Path = Arg(..., help="Directory with pipeline data", exists=True, file_okay=False), |     input_dir: Path = Arg(..., help="Directory with pipeline data", exists=True, file_okay=False), | ||||||
|     output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False), |     output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False), | ||||||
|     code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python file with additional code (registered functions) to be included in the package"), |     code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"), | ||||||
|     meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False), |     meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False), | ||||||
|     create_meta: bool = Opt(False, "--create-meta", "-C", help="Create meta.json, even if one exists"), |     create_meta: bool = Opt(False, "--create-meta", "-C", help="Create meta.json, even if one exists"), | ||||||
|     name: Optional[str] = Opt(None, "--name", "-n", help="Package name to override meta"), |     name: Optional[str] = Opt(None, "--name", "-n", help="Package name to override meta"), | ||||||
|  |  | ||||||
|  | @ -5,7 +5,7 @@ import typer | ||||||
| import re | import re | ||||||
| 
 | 
 | ||||||
| from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error | from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error | ||||||
| from ._util import import_code, setup_gpu | from ._util import import_code_paths, setup_gpu | ||||||
| from ..training.pretrain import pretrain | from ..training.pretrain import pretrain | ||||||
| from ..util import load_config | from ..util import load_config | ||||||
| 
 | 
 | ||||||
|  | @ -19,7 +19,7 @@ def pretrain_cli( | ||||||
|     ctx: typer.Context,  # This is only used to read additional arguments |     ctx: typer.Context,  # This is only used to read additional arguments | ||||||
|     config_path: Path = Arg(..., help="Path to config file", exists=True, dir_okay=False, allow_dash=True), |     config_path: Path = Arg(..., help="Path to config file", exists=True, dir_okay=False, allow_dash=True), | ||||||
|     output_dir: Path = Arg(..., help="Directory to write weights to on each epoch"), |     output_dir: Path = Arg(..., help="Directory to write weights to on each epoch"), | ||||||
|     code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), |     code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"), | ||||||
|     resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"), |     resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"), | ||||||
|     epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."), |     epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."), | ||||||
|     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"), |     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"), | ||||||
|  | @ -47,7 +47,7 @@ def pretrain_cli( | ||||||
|     DOCS: https://spacy.io/api/cli#pretrain |     DOCS: https://spacy.io/api/cli#pretrain | ||||||
|     """ |     """ | ||||||
|     config_overrides = parse_config_overrides(ctx.args) |     config_overrides = parse_config_overrides(ctx.args) | ||||||
|     import_code(code_path) |     import_code_paths(code_paths) | ||||||
|     verify_cli_args(config_path, output_dir, resume_path, epoch_resume) |     verify_cli_args(config_path, output_dir, resume_path, epoch_resume) | ||||||
|     setup_gpu(use_gpu) |     setup_gpu(use_gpu) | ||||||
|     msg.info(f"Loading config from: {config_path}") |     msg.info(f"Loading config from: {config_path}") | ||||||
|  |  | ||||||
|  | @ -6,7 +6,7 @@ import logging | ||||||
| import sys | import sys | ||||||
| 
 | 
 | ||||||
| from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error | from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error | ||||||
| from ._util import import_code, setup_gpu | from ._util import import_code_paths, setup_gpu | ||||||
| from ..training.loop import train as train_nlp | from ..training.loop import train as train_nlp | ||||||
| from ..training.initialize import init_nlp | from ..training.initialize import init_nlp | ||||||
| from .. import util | from .. import util | ||||||
|  | @ -20,7 +20,7 @@ def train_cli( | ||||||
|     ctx: typer.Context,  # This is only used to read additional arguments |     ctx: typer.Context,  # This is only used to read additional arguments | ||||||
|     config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True), |     config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True), | ||||||
|     output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store trained pipeline in"), |     output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store trained pipeline in"), | ||||||
|     code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), |     code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"), | ||||||
|     verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"), |     verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"), | ||||||
|     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU") |     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU") | ||||||
|     # fmt: on |     # fmt: on | ||||||
|  | @ -41,7 +41,7 @@ def train_cli( | ||||||
|     """ |     """ | ||||||
|     util.logger.setLevel(logging.DEBUG if verbose else logging.INFO) |     util.logger.setLevel(logging.DEBUG if verbose else logging.INFO) | ||||||
|     overrides = parse_config_overrides(ctx.args) |     overrides = parse_config_overrides(ctx.args) | ||||||
|     import_code(code_path) |     import_code_paths(code_paths) | ||||||
|     train(config_path, output_path, use_gpu=use_gpu, overrides=overrides) |     train(config_path, output_path, use_gpu=use_gpu, overrides=overrides) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user