mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 21:21:10 +03:00 
			
		
		
		
	Improve CLI config validation with latest Thinc
This commit is contained in:
		
							parent
							
								
									702edf52a0
								
							
						
					
					
						commit
						ca3c997062
					
				|  | @ -6,7 +6,7 @@ requires = [ | ||||||
|     "cymem>=2.0.2,<2.1.0", |     "cymem>=2.0.2,<2.1.0", | ||||||
|     "preshed>=3.0.2,<3.1.0", |     "preshed>=3.0.2,<3.1.0", | ||||||
|     "murmurhash>=0.28.0,<1.1.0", |     "murmurhash>=0.28.0,<1.1.0", | ||||||
|     "thinc>=8.0.0a35,<8.0.0a40", |     "thinc>=8.0.0a36,<8.0.0a40", | ||||||
|     "blis>=0.4.0,<0.5.0", |     "blis>=0.4.0,<0.5.0", | ||||||
|     "pytokenizations", |     "pytokenizations", | ||||||
|     "pathy" |     "pathy" | ||||||
|  |  | ||||||
|  | @ -1,7 +1,7 @@ | ||||||
| # Our libraries | # Our libraries | ||||||
| cymem>=2.0.2,<2.1.0 | cymem>=2.0.2,<2.1.0 | ||||||
| preshed>=3.0.2,<3.1.0 | preshed>=3.0.2,<3.1.0 | ||||||
| thinc>=8.0.0a35,<8.0.0a40 | thinc>=8.0.0a36,<8.0.0a40 | ||||||
| blis>=0.4.0,<0.5.0 | blis>=0.4.0,<0.5.0 | ||||||
| ml_datasets==0.2.0a0 | ml_datasets==0.2.0a0 | ||||||
| murmurhash>=0.28.0,<1.1.0 | murmurhash>=0.28.0,<1.1.0 | ||||||
|  |  | ||||||
|  | @ -34,13 +34,13 @@ setup_requires = | ||||||
|     cymem>=2.0.2,<2.1.0 |     cymem>=2.0.2,<2.1.0 | ||||||
|     preshed>=3.0.2,<3.1.0 |     preshed>=3.0.2,<3.1.0 | ||||||
|     murmurhash>=0.28.0,<1.1.0 |     murmurhash>=0.28.0,<1.1.0 | ||||||
|     thinc>=8.0.0a35,<8.0.0a40 |     thinc>=8.0.0a36,<8.0.0a40 | ||||||
| install_requires = | install_requires = | ||||||
|     # Our libraries |     # Our libraries | ||||||
|     murmurhash>=0.28.0,<1.1.0 |     murmurhash>=0.28.0,<1.1.0 | ||||||
|     cymem>=2.0.2,<2.1.0 |     cymem>=2.0.2,<2.1.0 | ||||||
|     preshed>=3.0.2,<3.1.0 |     preshed>=3.0.2,<3.1.0 | ||||||
|     thinc>=8.0.0a35,<8.0.0a40 |     thinc>=8.0.0a36,<8.0.0a40 | ||||||
|     blis>=0.4.0,<0.5.0 |     blis>=0.4.0,<0.5.0 | ||||||
|     wasabi>=0.8.0,<1.1.0 |     wasabi>=0.8.0,<1.1.0 | ||||||
|     srsly>=2.1.0,<3.0.0 |     srsly>=2.1.0,<3.0.0 | ||||||
|  |  | ||||||
|  | @ -10,7 +10,7 @@ from click import NoSuchOption | ||||||
| from click.parser import split_arg_string | from click.parser import split_arg_string | ||||||
| from typer.main import get_command | from typer.main import get_command | ||||||
| from contextlib import contextmanager | from contextlib import contextmanager | ||||||
| from thinc.config import Config, ConfigValidationError | from thinc.api import Config, ConfigValidationError | ||||||
| from configparser import InterpolationError | from configparser import InterpolationError | ||||||
| import os | import os | ||||||
| 
 | 
 | ||||||
|  | @ -226,24 +226,28 @@ def get_checksum(path: Union[Path, str]) -> str: | ||||||
| def show_validation_error( | def show_validation_error( | ||||||
|     file_path: Optional[Union[str, Path]] = None, |     file_path: Optional[Union[str, Path]] = None, | ||||||
|     *, |     *, | ||||||
|     title: str = "Config validation error", |     title: Optional[str] = None, | ||||||
|  |     desc: str = "", | ||||||
|  |     show_config: Optional[bool] = None, | ||||||
|     hint_fill: bool = True, |     hint_fill: bool = True, | ||||||
| ): | ): | ||||||
|     """Helper to show custom config validation errors on the CLI. |     """Helper to show custom config validation errors on the CLI. | ||||||
| 
 | 
 | ||||||
|     file_path (str / Path): Optional file path of config file, used in hints. |     file_path (str / Path): Optional file path of config file, used in hints. | ||||||
|     title (str): Title of the custom formatted error. |     title (str): Override title of custom formatted error. | ||||||
|  |     desc (str): Override description of custom formatted error. | ||||||
|  |     show_config (bool): Whether to output the config the error refers to. | ||||||
|     hint_fill (bool): Show hint about filling config. |     hint_fill (bool): Show hint about filling config. | ||||||
|     """ |     """ | ||||||
|     try: |     try: | ||||||
|         yield |         yield | ||||||
|     except (ConfigValidationError, InterpolationError) as e: |     except ConfigValidationError as e: | ||||||
|         msg.fail(title, spaced=True) |         title = title if title is not None else e.title | ||||||
|         # TODO: This is kinda hacky and we should probably provide a better |         # Re-generate a new error object with overrides | ||||||
|         # helper for this in Thinc |         err = e.from_error(e, title="", desc=desc, show_config=show_config) | ||||||
|         err_text = str(e).replace("Config validation error", "").strip() |         msg.fail(title) | ||||||
|         print(err_text) |         print(err.text.strip()) | ||||||
|         if hint_fill and "field required" in err_text: |         if hint_fill and "value_error.missing" in err.error_types: | ||||||
|             config_path = file_path if file_path is not None else "config.cfg" |             config_path = file_path if file_path is not None else "config.cfg" | ||||||
|             msg.text( |             msg.text( | ||||||
|                 "If your config contains missing values, you can run the 'init " |                 "If your config contains missing values, you can run the 'init " | ||||||
|  | @ -252,6 +256,8 @@ def show_validation_error( | ||||||
|             ) |             ) | ||||||
|             print(f"{COMMAND} init fill-config {config_path} --base {config_path}\n") |             print(f"{COMMAND} init fill-config {config_path} --base {config_path}\n") | ||||||
|         sys.exit(1) |         sys.exit(1) | ||||||
|  |     except InterpolationError as e: | ||||||
|  |         msg.fail("Config validation error", e, exits=1) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def import_code(code_path: Optional[Union[Path, str]]) -> None: | def import_code(code_path: Optional[Union[Path, str]]) -> None: | ||||||
|  |  | ||||||
|  | @ -1,8 +1,8 @@ | ||||||
| from typing import Optional, Dict, Any, Union, List | from typing import Optional, Dict, Any, Union, List | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
| from wasabi import msg, table | from wasabi import msg, table | ||||||
| from thinc.api import Config | from thinc.api import Config, ConfigValidationError | ||||||
| from thinc.config import VARIABLE_RE, ConfigValidationError | from thinc.config import VARIABLE_RE | ||||||
| import typer | import typer | ||||||
| 
 | 
 | ||||||
| from ._util import Arg, Opt, show_validation_error, parse_config_overrides | from ._util import Arg, Opt, show_validation_error, parse_config_overrides | ||||||
|  | @ -115,4 +115,4 @@ def check_section_refs(config: Config, fields: List[str]) -> None: | ||||||
|             msg = f"not a valid section reference: {value}" |             msg = f"not a valid section reference: {value}" | ||||||
|             errors.append({"loc": field.split("."), "msg": msg}) |             errors.append({"loc": field.split("."), "msg": msg}) | ||||||
|     if errors: |     if errors: | ||||||
|         raise ConfigValidationError(config, errors) |         raise ConfigValidationError(config=config, errors=errors) | ||||||
|  |  | ||||||
|  | @ -89,7 +89,7 @@ def train( | ||||||
|         nlp, config = util.load_model_from_config(config) |         nlp, config = util.load_model_from_config(config) | ||||||
|     util.load_vocab_data_into_model(nlp, lookups=config["training"]["lookups"]) |     util.load_vocab_data_into_model(nlp, lookups=config["training"]["lookups"]) | ||||||
|     if config["training"]["vectors"] is not None: |     if config["training"]["vectors"] is not None: | ||||||
|         util.load_vectors_into_model(nlp, config["training"]["vectors"]) |         add_vectors(nlp, config["training"]["vectors"]) | ||||||
|     raw_text, tag_map, morph_rules, weights_data = load_from_paths(config) |     raw_text, tag_map, morph_rules, weights_data = load_from_paths(config) | ||||||
|     T_cfg = config["training"] |     T_cfg = config["training"] | ||||||
|     optimizer = T_cfg["optimizer"] |     optimizer = T_cfg["optimizer"] | ||||||
|  | @ -195,6 +195,19 @@ def train( | ||||||
|             msg.good(f"Saved pipeline to output directory {final_model_path}") |             msg.good(f"Saved pipeline to output directory {final_model_path}") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def add_vectors(nlp: Language, vectors: str) -> None: | ||||||
|  |     title = f"Config validation error for vectors {vectors}" | ||||||
|  |     desc = ( | ||||||
|  |         "This typically means that there's a problem in the config.cfg included " | ||||||
|  |         "with the packaged vectors. Make sure that the vectors package you're " | ||||||
|  |         "loading is compatible with the current version of spaCy." | ||||||
|  |     ) | ||||||
|  |     with show_validation_error( | ||||||
|  |         title=title, desc=desc, hint_fill=False, show_config=False | ||||||
|  |     ): | ||||||
|  |         util.load_vectors_into_model(nlp, vectors) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def create_train_batches(iterator, batcher, max_epochs: int): | def create_train_batches(iterator, batcher, max_epochs: int): | ||||||
|     epoch = 0 |     epoch = 0 | ||||||
|     examples = list(iterator) |     examples = list(iterator) | ||||||
|  |  | ||||||
|  | @ -1,6 +1,6 @@ | ||||||
| import pytest | import pytest | ||||||
| from spacy.lang.zh import Chinese, _get_pkuseg_trie_data | from spacy.lang.zh import Chinese, _get_pkuseg_trie_data | ||||||
| from thinc.config import ConfigValidationError | from thinc.api import ConfigValidationError | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # fmt: off | # fmt: off | ||||||
|  |  | ||||||
|  | @ -4,8 +4,7 @@ from spacy.lang.en import English | ||||||
| from spacy.lang.de import German | from spacy.lang.de import German | ||||||
| from spacy.tokens import Doc | from spacy.tokens import Doc | ||||||
| from spacy.util import registry, SimpleFrozenDict, combine_score_weights | from spacy.util import registry, SimpleFrozenDict, combine_score_weights | ||||||
| from thinc.api import Model, Linear | from thinc.api import Model, Linear, ConfigValidationError | ||||||
| from thinc.config import ConfigValidationError |  | ||||||
| from pydantic import StrictInt, StrictStr | from pydantic import StrictInt, StrictStr | ||||||
| 
 | 
 | ||||||
| from ..util import make_tempdir | from ..util import make_tempdir | ||||||
|  |  | ||||||
|  | @ -1,5 +1,5 @@ | ||||||
| import pytest | import pytest | ||||||
| from thinc.config import Config, ConfigValidationError | from thinc.api import Config, ConfigValidationError | ||||||
| import spacy | import spacy | ||||||
| from spacy.lang.en import English | from spacy.lang.en import English | ||||||
| from spacy.lang.de import German | from spacy.lang.de import German | ||||||
|  |  | ||||||
|  | @ -8,7 +8,7 @@ from spacy.cli._util import validate_project_commands, parse_config_overrides | ||||||
| from spacy.cli._util import load_project_config, substitute_project_variables | from spacy.cli._util import load_project_config, substitute_project_variables | ||||||
| from spacy.cli._util import string_to_list, OVERRIDES_ENV_VAR | from spacy.cli._util import string_to_list, OVERRIDES_ENV_VAR | ||||||
| from spacy.cli.debug_config import check_section_refs | from spacy.cli.debug_config import check_section_refs | ||||||
| from thinc.config import ConfigValidationError, Config | from thinc.api import ConfigValidationError, Config | ||||||
| import srsly | import srsly | ||||||
| import os | import os | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user