From 9fde2580538967dc16f63d4c3bc55660d031d09e Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Mon, 21 Jun 2021 09:39:22 +0200 Subject: [PATCH] Use minor version for compatibility check (#8403) * Use minor version for compatibility check * Use minor version of compatibility table * Soften warning message about incompatible models * Add test for presence of current version in compatibility table * Add test for download compatibility table * Use minor version of lower pin in error message if possible * Fall back to spacy_git_version if available * Fix unknown version string --- spacy/cli/download.py | 4 ++-- spacy/cli/validate.py | 18 ++++++++++++------ spacy/errors.py | 12 ++++++------ spacy/tests/test_cli.py | 33 ++++++++++++++++++++++++++++++--- spacy/util.py | 23 ++++++++++++++++++++++- 5 files changed, 72 insertions(+), 18 deletions(-) diff --git a/spacy/cli/download.py b/spacy/cli/download.py index d09d5147a..4ea9a8f0e 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -6,7 +6,7 @@ import typer from ._util import app, Arg, Opt, WHEEL_SUFFIX, SDIST_SUFFIX from .. import about -from ..util import is_package, get_base_version, run_command +from ..util import is_package, get_minor_version, run_command from ..errors import OLD_MODEL_SHORTCUTS @@ -74,7 +74,7 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) - def get_compatibility() -> dict: - version = get_base_version(about.__version__) + version = get_minor_version(about.__version__) r = requests.get(about.__compatibility__) if r.status_code != 200: msg.fail( diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py index 67fc16383..41c4cfb72 100644 --- a/spacy/cli/validate.py +++ b/spacy/cli/validate.py @@ -3,10 +3,12 @@ from pathlib import Path import sys import requests from wasabi import msg, Printer +import warnings +from ..errors import Warnings from ._util import app from .. import about -from ..util import get_package_version, get_installed_models, get_base_version +from ..util import get_package_version, get_installed_models, get_minor_version from ..util import get_package_path, get_model_meta, is_compatible_version @@ -24,7 +26,7 @@ def validate_cli(): def validate() -> None: model_pkgs, compat = get_model_pkgs() - spacy_version = get_base_version(about.__version__) + spacy_version = get_minor_version(about.__version__) current_compat = compat.get(spacy_version, {}) if not current_compat: msg.warn(f"No compatible packages found for v{spacy_version} of spaCy") @@ -44,8 +46,8 @@ def validate() -> None: comp = msg.text("", color="green", icon="good", no_print=True) version = msg.text(data["version"], color="green", no_print=True) else: - version = msg.text(data["version"], color="red", no_print=True) - comp = f"--> {compat.get(data['name'], ['n/a'])[0]}" + version = msg.text(data["version"], color="yellow", no_print=True) + comp = f"--> {current_compat.get(data['name'], ['n/a'])[0]}" rows.append((data["name"], data["spacy"], version, comp)) msg.table(rows, header=header) else: @@ -78,7 +80,9 @@ def get_model_pkgs(silent: bool = False) -> Tuple[dict, dict]: msg.good("Loaded compatibility table") compat = r.json()["spacy"] all_models = set() - installed_models = get_installed_models() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="\\[W09[45]") + installed_models = get_installed_models() for spacy_v, models in dict(compat).items(): all_models.update(models.keys()) for model, model_vs in models.items(): @@ -92,7 +96,9 @@ def get_model_pkgs(silent: bool = False) -> Tuple[dict, dict]: spacy_version = about.__version__ else: model_path = get_package_path(package) - model_meta = get_model_meta(model_path) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="\\[W09[45]") + model_meta = get_model_meta(model_path) spacy_version = model_meta.get("spacy_version", "n/a") is_compat = is_compatible_version(about.__version__, spacy_version) pkgs[pkg_name] = { diff --git a/spacy/errors.py b/spacy/errors.py index b5d11c1f7..56b6f8427 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -150,12 +150,12 @@ class Warnings: "released, because the model may say it's compatible when it's " 'not. Consider changing the "spacy_version" in your meta.json to a ' "version range, with a lower and upper pin. For example: {example}") - W095 = ("Model '{model}' ({model_version}) requires spaCy {version} and is " - "incompatible with the current version ({current}). This may lead " - "to unexpected results or runtime errors. To resolve this, " - "download a newer compatible model or retrain your custom model " - "with the current spaCy version. For more details and available " - "updates, run: python -m spacy validate") + W095 = ("Model '{model}' ({model_version}) was trained with spaCy " + "{version} and may not be 100% compatible with the current version " + "({current}). If you see errors or degraded performance, download " + "a newer compatible model or retrain your custom model with the " + "current spaCy version. For more details and available updates, " + "run: python -m spacy validate") W096 = ("The method `nlp.disable_pipes` is now deprecated - use " "`nlp.select_pipes` instead.") W100 = ("Skipping unsupported morphological feature(s): '{feature}'. " diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py index 2013ceac4..11324aa63 100644 --- a/spacy/tests/test_cli.py +++ b/spacy/tests/test_cli.py @@ -10,6 +10,10 @@ from spacy.cli.init_config import init_config, RECOMMENDATIONS from spacy.cli._util import validate_project_commands, parse_config_overrides from spacy.cli._util import load_project_config, substitute_project_variables from spacy.cli._util import string_to_list +from spacy import about +from spacy.util import get_minor_version +from spacy.cli.validate import get_model_pkgs +from spacy.cli.download import get_compatibility, get_version from thinc.api import ConfigValidationError, Config import srsly import os @@ -308,7 +312,8 @@ def test_project_config_validation2(config, n_errors): @pytest.mark.parametrize( - "int_value", [10, pytest.param("10", marks=pytest.mark.xfail)], + "int_value", + [10, pytest.param("10", marks=pytest.mark.xfail)], ) def test_project_config_interpolation(int_value): variables = {"a": int_value, "b": {"c": "foo", "d": True}} @@ -331,7 +336,8 @@ def test_project_config_interpolation(int_value): @pytest.mark.parametrize( - "greeting", [342, "everyone", "tout le monde", pytest.param("42", marks=pytest.mark.xfail)], + "greeting", + [342, "everyone", "tout le monde", pytest.param("42", marks=pytest.mark.xfail)], ) def test_project_config_interpolation_override(greeting): variables = {"a": "world"} @@ -423,7 +429,13 @@ def test_parse_cli_overrides(): @pytest.mark.parametrize("pretraining", [True, False]) def test_init_config(lang, pipeline, optimize, pretraining): # TODO: add more tests and also check for GPU with transformers - config = init_config(lang=lang, pipeline=pipeline, optimize=optimize, pretraining=pretraining, gpu=False) + config = init_config( + lang=lang, + pipeline=pipeline, + optimize=optimize, + pretraining=pretraining, + gpu=False, + ) assert isinstance(config, Config) if pretraining: config["paths"]["raw_text"] = "my_data.jsonl" @@ -474,3 +486,18 @@ def test_string_to_list(value): def test_string_to_list_intify(value): assert string_to_list(value, intify=False) == ["1", "2", "3"] assert string_to_list(value, intify=True) == [1, 2, 3] + + +def test_download_compatibility(): + model_name = "en_core_web_sm" + compatibility = get_compatibility() + version = get_version(model_name, compatibility) + assert get_minor_version(about.__version__) == get_minor_version(version) + + +def test_validate_compatibility_table(): + model_pkgs, compat = get_model_pkgs() + spacy_version = get_minor_version(about.__version__) + current_compat = compat.get(spacy_version, {}) + assert len(current_compat) > 0 + assert "en_core_web_sm" in current_compat diff --git a/spacy/util.py b/spacy/util.py index 52b48dcdb..d9dd314a4 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -648,6 +648,19 @@ def get_model_version_range(spacy_version: str) -> str: return f">={spacy_version},<{release[0]}.{release[1] + 1}.0" +def get_model_lower_version(constraint: str) -> Optional[str]: + """From a version range like >=1.2.3,<1.3.0 return the lower pin. + """ + try: + specset = SpecifierSet(constraint) + for spec in specset: + if spec.operator in (">=", "==", "~="): + return spec.version + except Exception: + pass + return None + + def get_base_version(version: str) -> str: """Generate the base version without any prerelease identifiers. @@ -701,10 +714,18 @@ def load_meta(path: Union[str, Path]) -> Dict[str, Any]: raise ValueError(Errors.E054.format(setting=setting)) if "spacy_version" in meta: if not is_compatible_version(about.__version__, meta["spacy_version"]): + lower_version = get_model_lower_version(meta["spacy_version"]) + lower_version = get_minor_version(lower_version) + if lower_version is not None: + lower_version = "v" + lower_version + elif "spacy_git_version" in meta: + lower_version = "git commit " + meta["spacy_git_version"] + else: + lower_version = "version unknown" warn_msg = Warnings.W095.format( model=f"{meta['lang']}_{meta['name']}", model_version=meta["version"], - version=meta["spacy_version"], + version=lower_version, current=about.__version__, ) warnings.warn(warn_msg)