Merge pull request #5516 from explosion/feature/improve-model-version-deps

This commit is contained in:
Ines Montani 2020-05-31 12:54:01 +02:00 committed by GitHub
commit b5ae2edcba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 87 additions and 43 deletions

View File

@ -13,8 +13,11 @@ numpy>=1.15.0
requests>=2.13.0,<3.0.0 requests>=2.13.0,<3.0.0
plac>=0.9.6,<1.2.0 plac>=0.9.6,<1.2.0
tqdm>=4.38.0,<5.0.0 tqdm>=4.38.0,<5.0.0
importlib_metadata>=0.20; python_version < "3.8"
pydantic>=1.3.0,<2.0.0 pydantic>=1.3.0,<2.0.0
# Official Python utilities
setuptools
packaging
importlib_metadata>=0.20; python_version < "3.8"
# Development dependencies # Development dependencies
cython>=0.25 cython>=0.25
pytest>=4.6.5 pytest>=4.6.5

View File

@ -50,11 +50,13 @@ install_requires =
ml_datasets>=0.1.1 ml_datasets>=0.1.1
# Third-party dependencies # Third-party dependencies
tqdm>=4.38.0,<5.0.0 tqdm>=4.38.0,<5.0.0
setuptools
numpy>=1.15.0 numpy>=1.15.0
plac>=0.9.6,<1.2.0 plac>=0.9.6,<1.2.0
requests>=2.13.0,<3.0.0 requests>=2.13.0,<3.0.0
pydantic>=1.3.0,<2.0.0 pydantic>=1.3.0,<2.0.0
# Official Python utilities
setuptools
packaging
importlib_metadata>=0.20; python_version < "3.8" importlib_metadata>=0.20; python_version < "3.8"
[options.extras_require] [options.extras_require]

View File

@ -5,7 +5,7 @@ import sys
from wasabi import msg from wasabi import msg
from .. import about from .. import about
from ..util import is_package from ..util import is_package, get_base_version
def download( def download(
@ -63,8 +63,7 @@ def get_json(url, desc):
def get_compatibility(): def get_compatibility():
version = about.__version__ version = get_base_version(about.__version__)
version = version.rsplit(".dev", 1)[0]
comp_table = get_json(about.__compatibility__, "compatibility table") comp_table = get_json(about.__compatibility__, "compatibility table")
comp = comp_table["spacy"] comp = comp_table["spacy"]
if version not in comp: if version not in comp:
@ -73,7 +72,7 @@ def get_compatibility():
def get_version(model, comp): def get_version(model, comp):
model = model.rsplit(".dev", 1)[0] model = get_base_version(model)
if model not in comp: if model not in comp:
msg.fail( msg.fail(
f"No compatible model found for '{model}' (spaCy v{about.__version__})", f"No compatible model found for '{model}' (spaCy v{about.__version__})",

View File

@ -90,7 +90,7 @@ def generate_meta(model_path, existing_meta, msg):
("license", "License", meta.get("license", "MIT")), ("license", "License", meta.get("license", "MIT")),
] ]
nlp = util.load_model_from_path(Path(model_path)) nlp = util.load_model_from_path(Path(model_path))
meta["spacy_version"] = about.__version__ meta["spacy_version"] = util.get_model_version_range(about.__version__)
meta["pipeline"] = nlp.pipe_names meta["pipeline"] = nlp.pipe_names
meta["vectors"] = { meta["vectors"] = {
"width": nlp.vocab.vectors_length, "width": nlp.vocab.vectors_length,

View File

@ -467,7 +467,6 @@ def train(
# Update model meta.json # Update model meta.json
meta["lang"] = nlp.lang meta["lang"] = nlp.lang
meta["pipeline"] = nlp.pipe_names meta["pipeline"] = nlp.pipe_names
meta["spacy_version"] = about.__version__
if beam_width == 1: if beam_width == 1:
meta["speed"] = { meta["speed"] = {
"nwords": nwords, "nwords": nwords,

View File

@ -4,8 +4,8 @@ import requests
from wasabi import msg from wasabi import msg
from .. import about from .. import about
from ..util import get_package_version, get_installed_models, split_version from ..util import get_package_version, get_installed_models, get_base_version
from ..util import get_package_path, get_model_meta, is_compatible_model from ..util import get_package_path, get_model_meta, is_compatible_version
def validate(): def validate():
@ -14,7 +14,7 @@ def validate():
with the installed models. Should be run after `pip install -U spacy`. with the installed models. Should be run after `pip install -U spacy`.
""" """
model_pkgs, compat = get_model_pkgs() model_pkgs, compat = get_model_pkgs()
spacy_version = about.__version__.rsplit(".dev", 1)[0] spacy_version = get_base_version(about.__version__)
current_compat = compat.get(spacy_version, {}) current_compat = compat.get(spacy_version, {})
if not current_compat: if not current_compat:
msg.warn(f"No compatible models found for v{spacy_version} of spaCy") msg.warn(f"No compatible models found for v{spacy_version} of spaCy")
@ -78,13 +78,12 @@ def get_model_pkgs():
version = get_package_version(pkg_name) version = get_package_version(pkg_name)
if package in compat: if package in compat:
is_compat = version in compat[package] is_compat = version in compat[package]
v_maj, v_min = split_version(about.__version__) spacy_version = about.__version__
spacy_version = f"{v_maj}.{v_min}"
else: else:
model_path = get_package_path(package) model_path = get_package_path(package)
model_meta = get_model_meta(model_path) model_meta = get_model_meta(model_path)
is_compat = is_compatible_model(model_meta)
spacy_version = model_meta.get("spacy_version", "n/a") spacy_version = model_meta.get("spacy_version", "n/a")
is_compat = is_compatible_version(about.__version__, spacy_version)
pkgs[pkg_name] = { pkgs[pkg_name] = {
"name": package, "name": package,
"version": version, "version": version,

View File

@ -104,6 +104,12 @@ class Warnings(object):
"string \"Field1=Value1,Value2|Field2=Value3\".") "string \"Field1=Value1,Value2|Field2=Value3\".")
# TODO: fix numbering after merging develop into master # TODO: fix numbering after merging develop into master
W095 = ("Model '{model}' ({model_version}) requires spaCy {version} and is "
"incompatible with the current version ({current}). This may lead "
"to unexpected results or runtime errors. To resolve this, "
"download a newer compatible model or retrain your custom model "
"with the current spaCy version. For more details and available "
"updates, run: python -m spacy validate")
W096 = ("The method 'disable_pipes' has become deprecated - use 'select_pipes' " W096 = ("The method 'disable_pipes' has become deprecated - use 'select_pipes' "
"instead.") "instead.")
W097 = ("No Model config was provided to create the '{name}' component, " W097 = ("No Model config was provided to create the '{name}' component, "

View File

@ -191,13 +191,14 @@ class Language(object):
@property @property
def meta(self): def meta(self):
spacy_version = util.get_model_version_range(about.__version__)
if self.vocab.lang: if self.vocab.lang:
self._meta.setdefault("lang", self.vocab.lang) self._meta.setdefault("lang", self.vocab.lang)
else: else:
self._meta.setdefault("lang", self.lang) self._meta.setdefault("lang", self.lang)
self._meta.setdefault("name", "model") self._meta.setdefault("name", "model")
self._meta.setdefault("version", "0.0.0") self._meta.setdefault("version", "0.0.0")
self._meta.setdefault("spacy_version", about.__version__) self._meta.setdefault("spacy_version", spacy_version)
self._meta.setdefault("description", "") self._meta.setdefault("description", "")
self._meta.setdefault("author", "") self._meta.setdefault("author", "")
self._meta.setdefault("email", "") self._meta.setdefault("email", "")

View File

@ -94,8 +94,18 @@ def test_ascii_filenames():
@pytest.mark.parametrize( @pytest.mark.parametrize(
"version,compatible", "version,constraint,compatible",
[(spacy_version, True), ("2.0.0", False), (">=1.2.3,<4.5.6", False)], [
(spacy_version, spacy_version, True),
(spacy_version, f">={spacy_version}", True),
("3.0.0", "2.0.0", False),
("3.2.1", ">=2.0.0", True),
("2.2.10a1", ">=1.0.0,<2.1.1", False),
("3.0.0.dev3", ">=1.2.3,<4.5.6", True),
("n/a", ">=1.2.3,<4.5.6", None),
("1.2.3", "n/a", None),
("n/a", "n/a", None),
],
) )
def test_is_compatible_model(version, compatible): def test_is_compatible_version(version, constraint, compatible):
assert util.is_compatible_model({"spacy_version": version}) is compatible assert util.is_compatible_version(version, constraint) is compatible

View File

@ -14,6 +14,8 @@ import srsly
import catalogue import catalogue
import sys import sys
import warnings import warnings
from packaging.specifiers import SpecifierSet, InvalidSpecifier
from packaging.version import Version, InvalidVersion
try: try:
@ -236,33 +238,46 @@ def get_package_version(name):
return None return None
def split_version(version): def is_compatible_version(version, constraint, prereleases=True):
"""RETURNS (tuple): Two integers, the major and minor spaCy version.""" """Check if a version (e.g. "2.0.0") is compatible given a version
pieces = version.split(".", 3) constraint (e.g. ">=1.9.0,<2.2.1"). If the constraint is a specific version,
return int(pieces[0]), int(pieces[1]) it's interpreted as =={version}.
version (str): The version to check.
def is_compatible_model(meta): constraint (str): The constraint string.
"""Check if a model is compatible with the current version of spaCy, based prereleases (bool): Whether to allow prereleases. If set to False,
on its meta.json. We compare the version of spaCy the model was created with prerelease versions will be considered incompatible.
with the current version. If the minor version is different, it's considered RETURNS (bool / None): Whether the version is compatible, or None if the
incompatible. version or constraint are invalid.
meta (dict): The model's meta.
RETURNS (bool / None): Whether the model is compatible with the current
spaCy or None if we don't have enough info.
""" """
cur_v = about.__version__ # Handle cases where exact version is provided as constraint
pkg_v = meta.get("spacy_version") if constraint[0].isdigit():
if not pkg_v or not isinstance(pkg_v, str): constraint = f"=={constraint}"
try:
spec = SpecifierSet(constraint)
version = Version(version)
except (InvalidSpecifier, InvalidVersion):
return None return None
# Handle spacy_version values like >=x,<y, just in case spec.prereleases = prereleases
pkg_v = re.sub(r"[^0-9.]", "", pkg_v.split(",")[0]) return version in spec
cur_major, cur_minor = split_version(cur_v)
pkg_major, pkg_minor = split_version(pkg_v)
if cur_major != pkg_major or cur_minor != pkg_minor: def get_model_version_range(spacy_version):
return False """Generate a version range like >=1.2.3,<1.3.0 based on a given spaCy
return True version. Models are always compatible across patch versions but not
across minor or major versions.
"""
release = Version(spacy_version).release
return f">={spacy_version},<{release[0]}.{release[1] + 1}.0"
def get_base_version(version):
"""Generate the base version without any prerelease identifiers.
version (str): The version, e.g. "3.0.0.dev1".
RETURNS (str): The base version, e.g. "3.0.0".
"""
return Version(version).base_version
def load_config(path, create_objects=False): def load_config(path, create_objects=False):
@ -315,6 +330,16 @@ def get_model_meta(path):
for setting in ["lang", "name", "version"]: for setting in ["lang", "name", "version"]:
if setting not in meta or not meta[setting]: if setting not in meta or not meta[setting]:
raise ValueError(Errors.E054.format(setting=setting)) raise ValueError(Errors.E054.format(setting=setting))
if "spacy_version" in meta:
if not is_compatible_version(about.__version__, meta["spacy_version"]):
warnings.warn(
Warnings.W095.format(
model=f"{meta['lang']}_{meta['name']}",
model_version=meta["version"],
version=meta["spacy_version"],
current=about.__version__,
)
)
return meta return meta