diff --git a/requirements.txt b/requirements.txt index e5f1ae10b..c43ffa7bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,7 @@ numpy>=1.15.0 requests>=2.13.0,<3.0.0 plac>=0.9.6,<1.2.0 tqdm>=4.38.0,<5.0.0 +importlib_metadata>=0.20; python_version < "3.8" # Optional dependencies jsonschema>=2.6.0,<3.1.0 pydantic>=1.3.0,<2.0.0 diff --git a/setup.cfg b/setup.cfg index 1cd088279..eb7608c4e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -56,6 +56,7 @@ install_requires = requests>=2.13.0,<3.0.0 pydantic>=1.3.0,<2.0.0 tqdm>=4.38.0,<5.0.0 + importlib_metadata>=0.20; python_version < "3.8" [options.extras_require] lookups = diff --git a/spacy/cli/info.py b/spacy/cli/info.py index 23f766368..d779eb2b3 100644 --- a/spacy/cli/info.py +++ b/spacy/cli/info.py @@ -48,7 +48,9 @@ def info( "Location": str(Path(__file__).parent.parent), "Platform": platform.platform(), "Python version": platform.python_version(), - "Models": ", ".join(model["name"] for model in all_models.values()), + "Models": ", ".join( + f"{m['name']} ({m['version']})" for m in all_models.values() + ), } if not silent: title = "Info about spaCy" diff --git a/spacy/cli/package.py b/spacy/cli/package.py index 8e27e44d0..cf93c872f 100644 --- a/spacy/cli/package.py +++ b/spacy/cli/package.py @@ -83,14 +83,14 @@ def generate_meta(model_path, existing_meta, msg): ("lang", "Model language", meta.get("lang", "en")), ("name", "Model name", meta.get("name", "model")), ("version", "Model version", meta.get("version", "0.0.0")), - ("spacy_version", "Required spaCy version", f">={about.__version__},<3.0.0"), ("description", "Model description", meta.get("description", False)), ("author", "Author", meta.get("author", False)), ("email", "Author email", meta.get("email", False)), ("url", "Author website", meta.get("url", False)), - ("license", "License", meta.get("license", "CC BY-SA 3.0")), + ("license", "License", meta.get("license", "MIT")), ] nlp = util.load_model_from_path(Path(model_path)) + meta["spacy_version"] = about.__version__ meta["pipeline"] = nlp.pipe_names meta["vectors"] = { "width": nlp.vocab.vectors_length, @@ -168,6 +168,7 @@ def setup_package(): package_data={model_name: list_files(model_dir)}, install_requires=list_requirements(meta), zip_safe=False, + entry_points={'spacy_models': ['{m} = {m}'.format(m=model_name)]} ) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index 19e0a81e0..c205fa5b2 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -467,7 +467,7 @@ def train( # Update model meta.json meta["lang"] = nlp.lang meta["pipeline"] = nlp.pipe_names - meta["spacy_version"] = f">={about.__version__}" + meta["spacy_version"] = about.__version__ if beam_width == 1: meta["speed"] = { "nwords": nwords, diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py index a23ce3453..c39cadc7b 100644 --- a/spacy/cli/validate.py +++ b/spacy/cli/validate.py @@ -4,6 +4,8 @@ import requests from wasabi import msg from .. import about +from ..util import get_package_version, get_installed_models, split_version +from ..util import get_package_path, get_model_meta, is_compatible_model def validate(): @@ -25,7 +27,7 @@ def validate(): msg.info(f"spaCy installation: {spacy_dir}") if model_pkgs: - header = ("NAME", "VERSION", "") + header = ("NAME", "SPACY", "VERSION", "") rows = [] for name, data in model_pkgs.items(): if data["compat"]: @@ -34,7 +36,7 @@ def validate(): else: version = msg.text(data["version"], color="red", no_print=True) comp = f"--> {compat.get(data['name'], ['n/a'])[0]}" - rows.append((data["name"], version, comp)) + rows.append((data["name"], data["spacy"], version, comp)) msg.table(rows, header=header) else: msg.text("No models found in your current environment.", exits=0) @@ -44,8 +46,9 @@ def validate(): cmd = "python -m spacy download {}" print("\n".join([cmd.format(pkg) for pkg in update_models]) + "\n") if na_models: - msg.warn( - f"The following models are not available for spaCy v{about.__version__}:", + msg.info( + f"The following models are custom spaCy models or not " + f"available for spaCy v{about.__version__}:", ", ".join(na_models), ) if incompat_models: @@ -53,8 +56,6 @@ def validate(): def get_model_pkgs(): - import pkg_resources - with msg.loading("Loading compatibility table..."): r = requests.get(about.__compatibility__) if r.status_code != 200: @@ -66,20 +67,30 @@ def get_model_pkgs(): msg.good("Loaded compatibility table") compat = r.json()["spacy"] all_models = set() + installed_models = get_installed_models() for spacy_v, models in dict(compat).items(): all_models.update(models.keys()) for model, model_vs in models.items(): compat[spacy_v][model] = [reformat_version(v) for v in model_vs] pkgs = {} - for pkg_name, pkg_data in pkg_resources.working_set.by_key.items(): + for pkg_name in installed_models: package = pkg_name.replace("-", "_") - if package in all_models: - version = pkg_data.version - pkgs[pkg_name] = { - "name": package, - "version": version, - "compat": package in compat and version in compat[package], - } + version = get_package_version(pkg_name) + if package in compat: + is_compat = version in compat[package] + v_maj, v_min = split_version(about.__version__) + spacy_version = f"{v_maj}.{v_min}" + else: + model_path = get_package_path(package) + model_meta = get_model_meta(model_path) + is_compat = is_compatible_model(model_meta) + spacy_version = model_meta.get("spacy_version", "n/a") + pkgs[pkg_name] = { + "name": package, + "version": version, + "spacy": spacy_version, + "compat": is_compat, + } return pkgs, compat diff --git a/spacy/language.py b/spacy/language.py index d71c27406..f770cda2c 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -196,7 +196,7 @@ class Language(object): self._meta.setdefault("lang", self.lang) self._meta.setdefault("name", "model") self._meta.setdefault("version", "0.0.0") - self._meta.setdefault("spacy_version", f">={about.__version__}") + self._meta.setdefault("spacy_version", about.__version__) self._meta.setdefault("description", "") self._meta.setdefault("author", "") self._meta.setdefault("email", "") diff --git a/spacy/util.py b/spacy/util.py index 7f35c2f7c..5a7c633fa 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -21,9 +21,16 @@ try: except ImportError: cupy = None +try: # Python 3.8 + import importlib.metadata as importlib_metadata +except ImportError: + import importlib_metadata + from .symbols import ORTH from .compat import cupy, CudaStream from .errors import Errors, Warnings +from . import about + _PRINT_ENV = False @@ -35,6 +42,10 @@ class registry(thinc.registry): factories = catalogue.create("spacy", "factories", entry_points=True) displacy_colors = catalogue.create("spacy", "displacy_colors", entry_points=True) assets = catalogue.create("spacy", "assets", entry_points=True) + # This is mostly used to get a list of all installed models in the current + # environment. spaCy models packaged with `spacy package` will "advertise" + # themselves via entry points. + models = catalogue.create("spacy", "models", entry_points=True) def set_env_log(value): @@ -204,6 +215,56 @@ def load_model_from_init_py(init_file, **overrides): return load_model_from_path(data_path, meta, **overrides) +def get_installed_models(): + """List all model packages currently installed in the environment. + + RETURNS (list): The string names of the models. + """ + return list(registry.models.get_all().keys()) + + +def get_package_version(name): + """Get the version of an installed package. Typically used to get model + package versions. + + name (unicode): The name of the installed Python package. + RETURNS (unicode / None): The version or None if package not installed. + """ + try: + return importlib_metadata.version(name) + except importlib_metadata.PackageNotFoundError: + return None + + +def split_version(version): + """RETURNS (tuple): Two integers, the major and minor spaCy version.""" + pieces = version.split(".", 3) + return int(pieces[0]), int(pieces[1]) + + +def is_compatible_model(meta): + """Check if a model is compatible with the current version of spaCy, based + on its meta.json. We compare the version of spaCy the model was created with + with the current version. If the minor version is different, it's considered + incompatible. + + meta (dict): The model's meta. + RETURNS (bool / None): Whether the model is compatible with the current + spaCy or None if we don't have enough info. + """ + cur_v = about.__version__ + pkg_v = meta.get("spacy_version") + if not pkg_v or not isinstance(pkg_v, str): + return None + # Handle spacy_version values like >=x,