mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge pull request #5516 from explosion/feature/improve-model-version-deps
This commit is contained in:
		
						commit
						b5ae2edcba
					
				| 
						 | 
					@ -13,8 +13,11 @@ numpy>=1.15.0
 | 
				
			||||||
requests>=2.13.0,<3.0.0
 | 
					requests>=2.13.0,<3.0.0
 | 
				
			||||||
plac>=0.9.6,<1.2.0
 | 
					plac>=0.9.6,<1.2.0
 | 
				
			||||||
tqdm>=4.38.0,<5.0.0
 | 
					tqdm>=4.38.0,<5.0.0
 | 
				
			||||||
importlib_metadata>=0.20; python_version < "3.8"
 | 
					 | 
				
			||||||
pydantic>=1.3.0,<2.0.0
 | 
					pydantic>=1.3.0,<2.0.0
 | 
				
			||||||
 | 
					# Official Python utilities
 | 
				
			||||||
 | 
					setuptools
 | 
				
			||||||
 | 
					packaging
 | 
				
			||||||
 | 
					importlib_metadata>=0.20; python_version < "3.8"
 | 
				
			||||||
# Development dependencies
 | 
					# Development dependencies
 | 
				
			||||||
cython>=0.25
 | 
					cython>=0.25
 | 
				
			||||||
pytest>=4.6.5
 | 
					pytest>=4.6.5
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -50,11 +50,13 @@ install_requires =
 | 
				
			||||||
    ml_datasets>=0.1.1
 | 
					    ml_datasets>=0.1.1
 | 
				
			||||||
    # Third-party dependencies
 | 
					    # Third-party dependencies
 | 
				
			||||||
    tqdm>=4.38.0,<5.0.0
 | 
					    tqdm>=4.38.0,<5.0.0
 | 
				
			||||||
    setuptools
 | 
					 | 
				
			||||||
    numpy>=1.15.0
 | 
					    numpy>=1.15.0
 | 
				
			||||||
    plac>=0.9.6,<1.2.0
 | 
					    plac>=0.9.6,<1.2.0
 | 
				
			||||||
    requests>=2.13.0,<3.0.0
 | 
					    requests>=2.13.0,<3.0.0
 | 
				
			||||||
    pydantic>=1.3.0,<2.0.0
 | 
					    pydantic>=1.3.0,<2.0.0
 | 
				
			||||||
 | 
					    # Official Python utilities
 | 
				
			||||||
 | 
					    setuptools
 | 
				
			||||||
 | 
					    packaging
 | 
				
			||||||
    importlib_metadata>=0.20; python_version < "3.8"
 | 
					    importlib_metadata>=0.20; python_version < "3.8"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[options.extras_require]
 | 
					[options.extras_require]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,7 +5,7 @@ import sys
 | 
				
			||||||
from wasabi import msg
 | 
					from wasabi import msg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .. import about
 | 
					from .. import about
 | 
				
			||||||
from ..util import is_package
 | 
					from ..util import is_package, get_base_version
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def download(
 | 
					def download(
 | 
				
			||||||
| 
						 | 
					@ -63,8 +63,7 @@ def get_json(url, desc):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_compatibility():
 | 
					def get_compatibility():
 | 
				
			||||||
    version = about.__version__
 | 
					    version = get_base_version(about.__version__)
 | 
				
			||||||
    version = version.rsplit(".dev", 1)[0]
 | 
					 | 
				
			||||||
    comp_table = get_json(about.__compatibility__, "compatibility table")
 | 
					    comp_table = get_json(about.__compatibility__, "compatibility table")
 | 
				
			||||||
    comp = comp_table["spacy"]
 | 
					    comp = comp_table["spacy"]
 | 
				
			||||||
    if version not in comp:
 | 
					    if version not in comp:
 | 
				
			||||||
| 
						 | 
					@ -73,7 +72,7 @@ def get_compatibility():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_version(model, comp):
 | 
					def get_version(model, comp):
 | 
				
			||||||
    model = model.rsplit(".dev", 1)[0]
 | 
					    model = get_base_version(model)
 | 
				
			||||||
    if model not in comp:
 | 
					    if model not in comp:
 | 
				
			||||||
        msg.fail(
 | 
					        msg.fail(
 | 
				
			||||||
            f"No compatible model found for '{model}' (spaCy v{about.__version__})",
 | 
					            f"No compatible model found for '{model}' (spaCy v{about.__version__})",
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -90,7 +90,7 @@ def generate_meta(model_path, existing_meta, msg):
 | 
				
			||||||
        ("license", "License", meta.get("license", "MIT")),
 | 
					        ("license", "License", meta.get("license", "MIT")),
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    nlp = util.load_model_from_path(Path(model_path))
 | 
					    nlp = util.load_model_from_path(Path(model_path))
 | 
				
			||||||
    meta["spacy_version"] = about.__version__
 | 
					    meta["spacy_version"] = util.get_model_version_range(about.__version__)
 | 
				
			||||||
    meta["pipeline"] = nlp.pipe_names
 | 
					    meta["pipeline"] = nlp.pipe_names
 | 
				
			||||||
    meta["vectors"] = {
 | 
					    meta["vectors"] = {
 | 
				
			||||||
        "width": nlp.vocab.vectors_length,
 | 
					        "width": nlp.vocab.vectors_length,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -467,7 +467,6 @@ def train(
 | 
				
			||||||
                    # Update model meta.json
 | 
					                    # Update model meta.json
 | 
				
			||||||
                    meta["lang"] = nlp.lang
 | 
					                    meta["lang"] = nlp.lang
 | 
				
			||||||
                    meta["pipeline"] = nlp.pipe_names
 | 
					                    meta["pipeline"] = nlp.pipe_names
 | 
				
			||||||
                    meta["spacy_version"] = about.__version__
 | 
					 | 
				
			||||||
                    if beam_width == 1:
 | 
					                    if beam_width == 1:
 | 
				
			||||||
                        meta["speed"] = {
 | 
					                        meta["speed"] = {
 | 
				
			||||||
                            "nwords": nwords,
 | 
					                            "nwords": nwords,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,8 +4,8 @@ import requests
 | 
				
			||||||
from wasabi import msg
 | 
					from wasabi import msg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .. import about
 | 
					from .. import about
 | 
				
			||||||
from ..util import get_package_version, get_installed_models, split_version
 | 
					from ..util import get_package_version, get_installed_models, get_base_version
 | 
				
			||||||
from ..util import get_package_path, get_model_meta, is_compatible_model
 | 
					from ..util import get_package_path, get_model_meta, is_compatible_version
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def validate():
 | 
					def validate():
 | 
				
			||||||
| 
						 | 
					@ -14,7 +14,7 @@ def validate():
 | 
				
			||||||
    with the installed models. Should be run after `pip install -U spacy`.
 | 
					    with the installed models. Should be run after `pip install -U spacy`.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    model_pkgs, compat = get_model_pkgs()
 | 
					    model_pkgs, compat = get_model_pkgs()
 | 
				
			||||||
    spacy_version = about.__version__.rsplit(".dev", 1)[0]
 | 
					    spacy_version = get_base_version(about.__version__)
 | 
				
			||||||
    current_compat = compat.get(spacy_version, {})
 | 
					    current_compat = compat.get(spacy_version, {})
 | 
				
			||||||
    if not current_compat:
 | 
					    if not current_compat:
 | 
				
			||||||
        msg.warn(f"No compatible models found for v{spacy_version} of spaCy")
 | 
					        msg.warn(f"No compatible models found for v{spacy_version} of spaCy")
 | 
				
			||||||
| 
						 | 
					@ -78,13 +78,12 @@ def get_model_pkgs():
 | 
				
			||||||
        version = get_package_version(pkg_name)
 | 
					        version = get_package_version(pkg_name)
 | 
				
			||||||
        if package in compat:
 | 
					        if package in compat:
 | 
				
			||||||
            is_compat = version in compat[package]
 | 
					            is_compat = version in compat[package]
 | 
				
			||||||
            v_maj, v_min = split_version(about.__version__)
 | 
					            spacy_version = about.__version__
 | 
				
			||||||
            spacy_version = f"{v_maj}.{v_min}"
 | 
					 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            model_path = get_package_path(package)
 | 
					            model_path = get_package_path(package)
 | 
				
			||||||
            model_meta = get_model_meta(model_path)
 | 
					            model_meta = get_model_meta(model_path)
 | 
				
			||||||
            is_compat = is_compatible_model(model_meta)
 | 
					 | 
				
			||||||
            spacy_version = model_meta.get("spacy_version", "n/a")
 | 
					            spacy_version = model_meta.get("spacy_version", "n/a")
 | 
				
			||||||
 | 
					            is_compat = is_compatible_version(about.__version__, spacy_version)
 | 
				
			||||||
        pkgs[pkg_name] = {
 | 
					        pkgs[pkg_name] = {
 | 
				
			||||||
            "name": package,
 | 
					            "name": package,
 | 
				
			||||||
            "version": version,
 | 
					            "version": version,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -104,6 +104,12 @@ class Warnings(object):
 | 
				
			||||||
            "string \"Field1=Value1,Value2|Field2=Value3\".")
 | 
					            "string \"Field1=Value1,Value2|Field2=Value3\".")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # TODO: fix numbering after merging develop into master
 | 
					    # TODO: fix numbering after merging develop into master
 | 
				
			||||||
 | 
					    W095 = ("Model '{model}' ({model_version}) requires spaCy {version} and is "
 | 
				
			||||||
 | 
					            "incompatible with the current version ({current}). This may lead "
 | 
				
			||||||
 | 
					            "to unexpected results or runtime errors. To resolve this, "
 | 
				
			||||||
 | 
					            "download a newer compatible model or retrain your custom model "
 | 
				
			||||||
 | 
					            "with the current spaCy version. For more details and available "
 | 
				
			||||||
 | 
					            "updates, run: python -m spacy validate")
 | 
				
			||||||
    W096 = ("The method 'disable_pipes' has become deprecated - use 'select_pipes' "
 | 
					    W096 = ("The method 'disable_pipes' has become deprecated - use 'select_pipes' "
 | 
				
			||||||
            "instead.")
 | 
					            "instead.")
 | 
				
			||||||
    W097 = ("No Model config was provided to create the '{name}' component, "
 | 
					    W097 = ("No Model config was provided to create the '{name}' component, "
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -191,13 +191,14 @@ class Language(object):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @property
 | 
					    @property
 | 
				
			||||||
    def meta(self):
 | 
					    def meta(self):
 | 
				
			||||||
 | 
					        spacy_version = util.get_model_version_range(about.__version__)
 | 
				
			||||||
        if self.vocab.lang:
 | 
					        if self.vocab.lang:
 | 
				
			||||||
            self._meta.setdefault("lang", self.vocab.lang)
 | 
					            self._meta.setdefault("lang", self.vocab.lang)
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            self._meta.setdefault("lang", self.lang)
 | 
					            self._meta.setdefault("lang", self.lang)
 | 
				
			||||||
        self._meta.setdefault("name", "model")
 | 
					        self._meta.setdefault("name", "model")
 | 
				
			||||||
        self._meta.setdefault("version", "0.0.0")
 | 
					        self._meta.setdefault("version", "0.0.0")
 | 
				
			||||||
        self._meta.setdefault("spacy_version", about.__version__)
 | 
					        self._meta.setdefault("spacy_version", spacy_version)
 | 
				
			||||||
        self._meta.setdefault("description", "")
 | 
					        self._meta.setdefault("description", "")
 | 
				
			||||||
        self._meta.setdefault("author", "")
 | 
					        self._meta.setdefault("author", "")
 | 
				
			||||||
        self._meta.setdefault("email", "")
 | 
					        self._meta.setdefault("email", "")
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -94,8 +94,18 @@ def test_ascii_filenames():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@pytest.mark.parametrize(
 | 
					@pytest.mark.parametrize(
 | 
				
			||||||
    "version,compatible",
 | 
					    "version,constraint,compatible",
 | 
				
			||||||
    [(spacy_version, True), ("2.0.0", False), (">=1.2.3,<4.5.6", False)],
 | 
					    [
 | 
				
			||||||
 | 
					        (spacy_version, spacy_version, True),
 | 
				
			||||||
 | 
					        (spacy_version, f">={spacy_version}", True),
 | 
				
			||||||
 | 
					        ("3.0.0", "2.0.0", False),
 | 
				
			||||||
 | 
					        ("3.2.1", ">=2.0.0", True),
 | 
				
			||||||
 | 
					        ("2.2.10a1", ">=1.0.0,<2.1.1", False),
 | 
				
			||||||
 | 
					        ("3.0.0.dev3", ">=1.2.3,<4.5.6", True),
 | 
				
			||||||
 | 
					        ("n/a", ">=1.2.3,<4.5.6", None),
 | 
				
			||||||
 | 
					        ("1.2.3", "n/a", None),
 | 
				
			||||||
 | 
					        ("n/a", "n/a", None),
 | 
				
			||||||
 | 
					    ],
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
def test_is_compatible_model(version, compatible):
 | 
					def test_is_compatible_version(version, constraint, compatible):
 | 
				
			||||||
    assert util.is_compatible_model({"spacy_version": version}) is compatible
 | 
					    assert util.is_compatible_version(version, constraint) is compatible
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -14,6 +14,8 @@ import srsly
 | 
				
			||||||
import catalogue
 | 
					import catalogue
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
import warnings
 | 
					import warnings
 | 
				
			||||||
 | 
					from packaging.specifiers import SpecifierSet, InvalidSpecifier
 | 
				
			||||||
 | 
					from packaging.version import Version, InvalidVersion
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
try:
 | 
					try:
 | 
				
			||||||
| 
						 | 
					@ -236,33 +238,46 @@ def get_package_version(name):
 | 
				
			||||||
        return None
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def split_version(version):
 | 
					def is_compatible_version(version, constraint, prereleases=True):
 | 
				
			||||||
    """RETURNS (tuple): Two integers, the major and minor spaCy version."""
 | 
					    """Check if a version (e.g. "2.0.0") is compatible given a version
 | 
				
			||||||
    pieces = version.split(".", 3)
 | 
					    constraint (e.g. ">=1.9.0,<2.2.1"). If the constraint is a specific version,
 | 
				
			||||||
    return int(pieces[0]), int(pieces[1])
 | 
					    it's interpreted as =={version}.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    version (str): The version to check.
 | 
				
			||||||
def is_compatible_model(meta):
 | 
					    constraint (str): The constraint string.
 | 
				
			||||||
    """Check if a model is compatible with the current version of spaCy, based
 | 
					    prereleases (bool): Whether to allow prereleases. If set to False,
 | 
				
			||||||
    on its meta.json. We compare the version of spaCy the model was created with
 | 
					        prerelease versions will be considered incompatible.
 | 
				
			||||||
    with the current version. If the minor version is different, it's considered
 | 
					    RETURNS (bool / None): Whether the version is compatible, or None if the
 | 
				
			||||||
    incompatible.
 | 
					        version or constraint are invalid.
 | 
				
			||||||
 | 
					 | 
				
			||||||
    meta (dict): The model's meta.
 | 
					 | 
				
			||||||
    RETURNS (bool / None): Whether the model is compatible with the current
 | 
					 | 
				
			||||||
        spaCy or None if we don't have enough info.
 | 
					 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    cur_v = about.__version__
 | 
					    # Handle cases where exact version is provided as constraint
 | 
				
			||||||
    pkg_v = meta.get("spacy_version")
 | 
					    if constraint[0].isdigit():
 | 
				
			||||||
    if not pkg_v or not isinstance(pkg_v, str):
 | 
					        constraint = f"=={constraint}"
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        spec = SpecifierSet(constraint)
 | 
				
			||||||
 | 
					        version = Version(version)
 | 
				
			||||||
 | 
					    except (InvalidSpecifier, InvalidVersion):
 | 
				
			||||||
        return None
 | 
					        return None
 | 
				
			||||||
    # Handle spacy_version values like >=x,<y, just in case
 | 
					    spec.prereleases = prereleases
 | 
				
			||||||
    pkg_v = re.sub(r"[^0-9.]", "", pkg_v.split(",")[0])
 | 
					    return version in spec
 | 
				
			||||||
    cur_major, cur_minor = split_version(cur_v)
 | 
					
 | 
				
			||||||
    pkg_major, pkg_minor = split_version(pkg_v)
 | 
					
 | 
				
			||||||
    if cur_major != pkg_major or cur_minor != pkg_minor:
 | 
					def get_model_version_range(spacy_version):
 | 
				
			||||||
        return False
 | 
					    """Generate a version range like >=1.2.3,<1.3.0 based on a given spaCy
 | 
				
			||||||
    return True
 | 
					    version. Models are always compatible across patch versions but not
 | 
				
			||||||
 | 
					    across minor or major versions.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    release = Version(spacy_version).release
 | 
				
			||||||
 | 
					    return f">={spacy_version},<{release[0]}.{release[1] + 1}.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_base_version(version):
 | 
				
			||||||
 | 
					    """Generate the base version without any prerelease identifiers.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    version (str): The version, e.g. "3.0.0.dev1".
 | 
				
			||||||
 | 
					    RETURNS (str): The base version, e.g. "3.0.0".
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    return Version(version).base_version
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def load_config(path, create_objects=False):
 | 
					def load_config(path, create_objects=False):
 | 
				
			||||||
| 
						 | 
					@ -315,6 +330,16 @@ def get_model_meta(path):
 | 
				
			||||||
    for setting in ["lang", "name", "version"]:
 | 
					    for setting in ["lang", "name", "version"]:
 | 
				
			||||||
        if setting not in meta or not meta[setting]:
 | 
					        if setting not in meta or not meta[setting]:
 | 
				
			||||||
            raise ValueError(Errors.E054.format(setting=setting))
 | 
					            raise ValueError(Errors.E054.format(setting=setting))
 | 
				
			||||||
 | 
					    if "spacy_version" in meta:
 | 
				
			||||||
 | 
					        if not is_compatible_version(about.__version__, meta["spacy_version"]):
 | 
				
			||||||
 | 
					            warnings.warn(
 | 
				
			||||||
 | 
					                Warnings.W095.format(
 | 
				
			||||||
 | 
					                    model=f"{meta['lang']}_{meta['name']}",
 | 
				
			||||||
 | 
					                    model_version=meta["version"],
 | 
				
			||||||
 | 
					                    version=meta["spacy_version"],
 | 
				
			||||||
 | 
					                    current=about.__version__,
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
    return meta
 | 
					    return meta
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user