mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 01:34:30 +03:00
Merge pull request #5516 from explosion/feature/improve-model-version-deps
This commit is contained in:
commit
b5ae2edcba
|
@ -13,8 +13,11 @@ numpy>=1.15.0
|
||||||
requests>=2.13.0,<3.0.0
|
requests>=2.13.0,<3.0.0
|
||||||
plac>=0.9.6,<1.2.0
|
plac>=0.9.6,<1.2.0
|
||||||
tqdm>=4.38.0,<5.0.0
|
tqdm>=4.38.0,<5.0.0
|
||||||
importlib_metadata>=0.20; python_version < "3.8"
|
|
||||||
pydantic>=1.3.0,<2.0.0
|
pydantic>=1.3.0,<2.0.0
|
||||||
|
# Official Python utilities
|
||||||
|
setuptools
|
||||||
|
packaging
|
||||||
|
importlib_metadata>=0.20; python_version < "3.8"
|
||||||
# Development dependencies
|
# Development dependencies
|
||||||
cython>=0.25
|
cython>=0.25
|
||||||
pytest>=4.6.5
|
pytest>=4.6.5
|
||||||
|
|
|
@ -50,11 +50,13 @@ install_requires =
|
||||||
ml_datasets>=0.1.1
|
ml_datasets>=0.1.1
|
||||||
# Third-party dependencies
|
# Third-party dependencies
|
||||||
tqdm>=4.38.0,<5.0.0
|
tqdm>=4.38.0,<5.0.0
|
||||||
setuptools
|
|
||||||
numpy>=1.15.0
|
numpy>=1.15.0
|
||||||
plac>=0.9.6,<1.2.0
|
plac>=0.9.6,<1.2.0
|
||||||
requests>=2.13.0,<3.0.0
|
requests>=2.13.0,<3.0.0
|
||||||
pydantic>=1.3.0,<2.0.0
|
pydantic>=1.3.0,<2.0.0
|
||||||
|
# Official Python utilities
|
||||||
|
setuptools
|
||||||
|
packaging
|
||||||
importlib_metadata>=0.20; python_version < "3.8"
|
importlib_metadata>=0.20; python_version < "3.8"
|
||||||
|
|
||||||
[options.extras_require]
|
[options.extras_require]
|
||||||
|
|
|
@ -5,7 +5,7 @@ import sys
|
||||||
from wasabi import msg
|
from wasabi import msg
|
||||||
|
|
||||||
from .. import about
|
from .. import about
|
||||||
from ..util import is_package
|
from ..util import is_package, get_base_version
|
||||||
|
|
||||||
|
|
||||||
def download(
|
def download(
|
||||||
|
@ -63,8 +63,7 @@ def get_json(url, desc):
|
||||||
|
|
||||||
|
|
||||||
def get_compatibility():
|
def get_compatibility():
|
||||||
version = about.__version__
|
version = get_base_version(about.__version__)
|
||||||
version = version.rsplit(".dev", 1)[0]
|
|
||||||
comp_table = get_json(about.__compatibility__, "compatibility table")
|
comp_table = get_json(about.__compatibility__, "compatibility table")
|
||||||
comp = comp_table["spacy"]
|
comp = comp_table["spacy"]
|
||||||
if version not in comp:
|
if version not in comp:
|
||||||
|
@ -73,7 +72,7 @@ def get_compatibility():
|
||||||
|
|
||||||
|
|
||||||
def get_version(model, comp):
|
def get_version(model, comp):
|
||||||
model = model.rsplit(".dev", 1)[0]
|
model = get_base_version(model)
|
||||||
if model not in comp:
|
if model not in comp:
|
||||||
msg.fail(
|
msg.fail(
|
||||||
f"No compatible model found for '{model}' (spaCy v{about.__version__})",
|
f"No compatible model found for '{model}' (spaCy v{about.__version__})",
|
||||||
|
|
|
@ -90,7 +90,7 @@ def generate_meta(model_path, existing_meta, msg):
|
||||||
("license", "License", meta.get("license", "MIT")),
|
("license", "License", meta.get("license", "MIT")),
|
||||||
]
|
]
|
||||||
nlp = util.load_model_from_path(Path(model_path))
|
nlp = util.load_model_from_path(Path(model_path))
|
||||||
meta["spacy_version"] = about.__version__
|
meta["spacy_version"] = util.get_model_version_range(about.__version__)
|
||||||
meta["pipeline"] = nlp.pipe_names
|
meta["pipeline"] = nlp.pipe_names
|
||||||
meta["vectors"] = {
|
meta["vectors"] = {
|
||||||
"width": nlp.vocab.vectors_length,
|
"width": nlp.vocab.vectors_length,
|
||||||
|
|
|
@ -467,7 +467,6 @@ def train(
|
||||||
# Update model meta.json
|
# Update model meta.json
|
||||||
meta["lang"] = nlp.lang
|
meta["lang"] = nlp.lang
|
||||||
meta["pipeline"] = nlp.pipe_names
|
meta["pipeline"] = nlp.pipe_names
|
||||||
meta["spacy_version"] = about.__version__
|
|
||||||
if beam_width == 1:
|
if beam_width == 1:
|
||||||
meta["speed"] = {
|
meta["speed"] = {
|
||||||
"nwords": nwords,
|
"nwords": nwords,
|
||||||
|
|
|
@ -4,8 +4,8 @@ import requests
|
||||||
from wasabi import msg
|
from wasabi import msg
|
||||||
|
|
||||||
from .. import about
|
from .. import about
|
||||||
from ..util import get_package_version, get_installed_models, split_version
|
from ..util import get_package_version, get_installed_models, get_base_version
|
||||||
from ..util import get_package_path, get_model_meta, is_compatible_model
|
from ..util import get_package_path, get_model_meta, is_compatible_version
|
||||||
|
|
||||||
|
|
||||||
def validate():
|
def validate():
|
||||||
|
@ -14,7 +14,7 @@ def validate():
|
||||||
with the installed models. Should be run after `pip install -U spacy`.
|
with the installed models. Should be run after `pip install -U spacy`.
|
||||||
"""
|
"""
|
||||||
model_pkgs, compat = get_model_pkgs()
|
model_pkgs, compat = get_model_pkgs()
|
||||||
spacy_version = about.__version__.rsplit(".dev", 1)[0]
|
spacy_version = get_base_version(about.__version__)
|
||||||
current_compat = compat.get(spacy_version, {})
|
current_compat = compat.get(spacy_version, {})
|
||||||
if not current_compat:
|
if not current_compat:
|
||||||
msg.warn(f"No compatible models found for v{spacy_version} of spaCy")
|
msg.warn(f"No compatible models found for v{spacy_version} of spaCy")
|
||||||
|
@ -78,13 +78,12 @@ def get_model_pkgs():
|
||||||
version = get_package_version(pkg_name)
|
version = get_package_version(pkg_name)
|
||||||
if package in compat:
|
if package in compat:
|
||||||
is_compat = version in compat[package]
|
is_compat = version in compat[package]
|
||||||
v_maj, v_min = split_version(about.__version__)
|
spacy_version = about.__version__
|
||||||
spacy_version = f"{v_maj}.{v_min}"
|
|
||||||
else:
|
else:
|
||||||
model_path = get_package_path(package)
|
model_path = get_package_path(package)
|
||||||
model_meta = get_model_meta(model_path)
|
model_meta = get_model_meta(model_path)
|
||||||
is_compat = is_compatible_model(model_meta)
|
|
||||||
spacy_version = model_meta.get("spacy_version", "n/a")
|
spacy_version = model_meta.get("spacy_version", "n/a")
|
||||||
|
is_compat = is_compatible_version(about.__version__, spacy_version)
|
||||||
pkgs[pkg_name] = {
|
pkgs[pkg_name] = {
|
||||||
"name": package,
|
"name": package,
|
||||||
"version": version,
|
"version": version,
|
||||||
|
|
|
@ -104,6 +104,12 @@ class Warnings(object):
|
||||||
"string \"Field1=Value1,Value2|Field2=Value3\".")
|
"string \"Field1=Value1,Value2|Field2=Value3\".")
|
||||||
|
|
||||||
# TODO: fix numbering after merging develop into master
|
# TODO: fix numbering after merging develop into master
|
||||||
|
W095 = ("Model '{model}' ({model_version}) requires spaCy {version} and is "
|
||||||
|
"incompatible with the current version ({current}). This may lead "
|
||||||
|
"to unexpected results or runtime errors. To resolve this, "
|
||||||
|
"download a newer compatible model or retrain your custom model "
|
||||||
|
"with the current spaCy version. For more details and available "
|
||||||
|
"updates, run: python -m spacy validate")
|
||||||
W096 = ("The method 'disable_pipes' has become deprecated - use 'select_pipes' "
|
W096 = ("The method 'disable_pipes' has become deprecated - use 'select_pipes' "
|
||||||
"instead.")
|
"instead.")
|
||||||
W097 = ("No Model config was provided to create the '{name}' component, "
|
W097 = ("No Model config was provided to create the '{name}' component, "
|
||||||
|
|
|
@ -191,13 +191,14 @@ class Language(object):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def meta(self):
|
def meta(self):
|
||||||
|
spacy_version = util.get_model_version_range(about.__version__)
|
||||||
if self.vocab.lang:
|
if self.vocab.lang:
|
||||||
self._meta.setdefault("lang", self.vocab.lang)
|
self._meta.setdefault("lang", self.vocab.lang)
|
||||||
else:
|
else:
|
||||||
self._meta.setdefault("lang", self.lang)
|
self._meta.setdefault("lang", self.lang)
|
||||||
self._meta.setdefault("name", "model")
|
self._meta.setdefault("name", "model")
|
||||||
self._meta.setdefault("version", "0.0.0")
|
self._meta.setdefault("version", "0.0.0")
|
||||||
self._meta.setdefault("spacy_version", about.__version__)
|
self._meta.setdefault("spacy_version", spacy_version)
|
||||||
self._meta.setdefault("description", "")
|
self._meta.setdefault("description", "")
|
||||||
self._meta.setdefault("author", "")
|
self._meta.setdefault("author", "")
|
||||||
self._meta.setdefault("email", "")
|
self._meta.setdefault("email", "")
|
||||||
|
|
|
@ -94,8 +94,18 @@ def test_ascii_filenames():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"version,compatible",
|
"version,constraint,compatible",
|
||||||
[(spacy_version, True), ("2.0.0", False), (">=1.2.3,<4.5.6", False)],
|
[
|
||||||
|
(spacy_version, spacy_version, True),
|
||||||
|
(spacy_version, f">={spacy_version}", True),
|
||||||
|
("3.0.0", "2.0.0", False),
|
||||||
|
("3.2.1", ">=2.0.0", True),
|
||||||
|
("2.2.10a1", ">=1.0.0,<2.1.1", False),
|
||||||
|
("3.0.0.dev3", ">=1.2.3,<4.5.6", True),
|
||||||
|
("n/a", ">=1.2.3,<4.5.6", None),
|
||||||
|
("1.2.3", "n/a", None),
|
||||||
|
("n/a", "n/a", None),
|
||||||
|
],
|
||||||
)
|
)
|
||||||
def test_is_compatible_model(version, compatible):
|
def test_is_compatible_version(version, constraint, compatible):
|
||||||
assert util.is_compatible_model({"spacy_version": version}) is compatible
|
assert util.is_compatible_version(version, constraint) is compatible
|
||||||
|
|
|
@ -14,6 +14,8 @@ import srsly
|
||||||
import catalogue
|
import catalogue
|
||||||
import sys
|
import sys
|
||||||
import warnings
|
import warnings
|
||||||
|
from packaging.specifiers import SpecifierSet, InvalidSpecifier
|
||||||
|
from packaging.version import Version, InvalidVersion
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -236,33 +238,46 @@ def get_package_version(name):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def split_version(version):
|
def is_compatible_version(version, constraint, prereleases=True):
|
||||||
"""RETURNS (tuple): Two integers, the major and minor spaCy version."""
|
"""Check if a version (e.g. "2.0.0") is compatible given a version
|
||||||
pieces = version.split(".", 3)
|
constraint (e.g. ">=1.9.0,<2.2.1"). If the constraint is a specific version,
|
||||||
return int(pieces[0]), int(pieces[1])
|
it's interpreted as =={version}.
|
||||||
|
|
||||||
|
version (str): The version to check.
|
||||||
def is_compatible_model(meta):
|
constraint (str): The constraint string.
|
||||||
"""Check if a model is compatible with the current version of spaCy, based
|
prereleases (bool): Whether to allow prereleases. If set to False,
|
||||||
on its meta.json. We compare the version of spaCy the model was created with
|
prerelease versions will be considered incompatible.
|
||||||
with the current version. If the minor version is different, it's considered
|
RETURNS (bool / None): Whether the version is compatible, or None if the
|
||||||
incompatible.
|
version or constraint are invalid.
|
||||||
|
|
||||||
meta (dict): The model's meta.
|
|
||||||
RETURNS (bool / None): Whether the model is compatible with the current
|
|
||||||
spaCy or None if we don't have enough info.
|
|
||||||
"""
|
"""
|
||||||
cur_v = about.__version__
|
# Handle cases where exact version is provided as constraint
|
||||||
pkg_v = meta.get("spacy_version")
|
if constraint[0].isdigit():
|
||||||
if not pkg_v or not isinstance(pkg_v, str):
|
constraint = f"=={constraint}"
|
||||||
|
try:
|
||||||
|
spec = SpecifierSet(constraint)
|
||||||
|
version = Version(version)
|
||||||
|
except (InvalidSpecifier, InvalidVersion):
|
||||||
return None
|
return None
|
||||||
# Handle spacy_version values like >=x,<y, just in case
|
spec.prereleases = prereleases
|
||||||
pkg_v = re.sub(r"[^0-9.]", "", pkg_v.split(",")[0])
|
return version in spec
|
||||||
cur_major, cur_minor = split_version(cur_v)
|
|
||||||
pkg_major, pkg_minor = split_version(pkg_v)
|
|
||||||
if cur_major != pkg_major or cur_minor != pkg_minor:
|
def get_model_version_range(spacy_version):
|
||||||
return False
|
"""Generate a version range like >=1.2.3,<1.3.0 based on a given spaCy
|
||||||
return True
|
version. Models are always compatible across patch versions but not
|
||||||
|
across minor or major versions.
|
||||||
|
"""
|
||||||
|
release = Version(spacy_version).release
|
||||||
|
return f">={spacy_version},<{release[0]}.{release[1] + 1}.0"
|
||||||
|
|
||||||
|
|
||||||
|
def get_base_version(version):
|
||||||
|
"""Generate the base version without any prerelease identifiers.
|
||||||
|
|
||||||
|
version (str): The version, e.g. "3.0.0.dev1".
|
||||||
|
RETURNS (str): The base version, e.g. "3.0.0".
|
||||||
|
"""
|
||||||
|
return Version(version).base_version
|
||||||
|
|
||||||
|
|
||||||
def load_config(path, create_objects=False):
|
def load_config(path, create_objects=False):
|
||||||
|
@ -315,6 +330,16 @@ def get_model_meta(path):
|
||||||
for setting in ["lang", "name", "version"]:
|
for setting in ["lang", "name", "version"]:
|
||||||
if setting not in meta or not meta[setting]:
|
if setting not in meta or not meta[setting]:
|
||||||
raise ValueError(Errors.E054.format(setting=setting))
|
raise ValueError(Errors.E054.format(setting=setting))
|
||||||
|
if "spacy_version" in meta:
|
||||||
|
if not is_compatible_version(about.__version__, meta["spacy_version"]):
|
||||||
|
warnings.warn(
|
||||||
|
Warnings.W095.format(
|
||||||
|
model=f"{meta['lang']}_{meta['name']}",
|
||||||
|
model_version=meta["version"],
|
||||||
|
version=meta["spacy_version"],
|
||||||
|
current=about.__version__,
|
||||||
|
)
|
||||||
|
)
|
||||||
return meta
|
return meta
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user