mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Auto-detect package dependencies in spacy package (#8948)
* Auto-detect package dependencies in spacy package * Add simple get_third_party_dependencies test * Import packages_distributions explicitly * Inline packages_distributions * Fix docstring [ci skip] * Relax catalogue requirement * Move importlib_metadata to spacy.compat with note * Include license information [ci skip]
This commit is contained in:
parent
0a6b68848f
commit
d94ddd5686
|
@ -104,3 +104,26 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
SOFTWARE.
|
SOFTWARE.
|
||||||
|
|
||||||
|
|
||||||
|
importlib_metadata
|
||||||
|
------------------
|
||||||
|
|
||||||
|
* Files: util.py
|
||||||
|
|
||||||
|
The implementation of packages_distributions() is adapted from
|
||||||
|
importlib_metadata, which is distributed under the following license:
|
||||||
|
|
||||||
|
Copyright 2017-2019 Jason R. Coombs, Barry Warsaw
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
|
|
@ -2,6 +2,8 @@ from typing import Optional, Union, Any, Dict, List, Tuple
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from wasabi import Printer, MarkdownRenderer, get_raw_input
|
from wasabi import Printer, MarkdownRenderer, get_raw_input
|
||||||
|
from thinc.api import Config
|
||||||
|
from collections import defaultdict
|
||||||
import srsly
|
import srsly
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
@ -99,6 +101,12 @@ def package(
|
||||||
msg.fail("Can't load pipeline meta.json", meta_path, exits=1)
|
msg.fail("Can't load pipeline meta.json", meta_path, exits=1)
|
||||||
meta = srsly.read_json(meta_path)
|
meta = srsly.read_json(meta_path)
|
||||||
meta = get_meta(input_dir, meta)
|
meta = get_meta(input_dir, meta)
|
||||||
|
if meta["requirements"]:
|
||||||
|
msg.good(
|
||||||
|
f"Including {len(meta['requirements'])} package requirement(s) from "
|
||||||
|
f"meta and config",
|
||||||
|
", ".join(meta["requirements"]),
|
||||||
|
)
|
||||||
if name is not None:
|
if name is not None:
|
||||||
meta["name"] = name
|
meta["name"] = name
|
||||||
if version is not None:
|
if version is not None:
|
||||||
|
@ -175,6 +183,51 @@ def has_wheel() -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_third_party_dependencies(
|
||||||
|
config: Config, exclude: List[str] = util.SimpleFrozenList()
|
||||||
|
) -> List[str]:
|
||||||
|
"""If the config includes references to registered functions that are
|
||||||
|
provided by third-party packages (spacy-transformers, other libraries), we
|
||||||
|
want to include them in meta["requirements"] so that the package specifies
|
||||||
|
them as dependencies and the user won't have to do it manually.
|
||||||
|
|
||||||
|
We do this by:
|
||||||
|
- traversing the config to check for registered function (@ keys)
|
||||||
|
- looking up the functions and getting their module
|
||||||
|
- looking up the module version and generating an appropriate version range
|
||||||
|
|
||||||
|
config (Config): The pipeline config.
|
||||||
|
exclude (list): List of packages to exclude (e.g. that already exist in meta).
|
||||||
|
RETURNS (list): The versioned requirements.
|
||||||
|
"""
|
||||||
|
own_packages = ("spacy", "spacy-nightly", "thinc", "srsly")
|
||||||
|
distributions = util.packages_distributions()
|
||||||
|
funcs = defaultdict(set)
|
||||||
|
for path, value in util.walk_dict(config):
|
||||||
|
if path[-1].startswith("@"): # collect all function references by registry
|
||||||
|
funcs[path[-1][1:]].add(value)
|
||||||
|
modules = set()
|
||||||
|
for reg_name, func_names in funcs.items():
|
||||||
|
sub_registry = getattr(util.registry, reg_name)
|
||||||
|
for func_name in func_names:
|
||||||
|
func_info = sub_registry.find(func_name)
|
||||||
|
module_name = func_info.get("module")
|
||||||
|
if module_name: # the code is part of a module, not a --code file
|
||||||
|
modules.add(func_info["module"].split(".")[0])
|
||||||
|
dependencies = []
|
||||||
|
for module_name in modules:
|
||||||
|
if module_name in distributions:
|
||||||
|
dist = distributions.get(module_name)
|
||||||
|
if dist:
|
||||||
|
pkg = dist[0]
|
||||||
|
if pkg in own_packages or pkg in exclude:
|
||||||
|
continue
|
||||||
|
version = util.get_package_version(pkg)
|
||||||
|
version_range = util.get_minor_version_range(version)
|
||||||
|
dependencies.append(f"{pkg}{version_range}")
|
||||||
|
return dependencies
|
||||||
|
|
||||||
|
|
||||||
def get_build_formats(formats: List[str]) -> Tuple[bool, bool]:
|
def get_build_formats(formats: List[str]) -> Tuple[bool, bool]:
|
||||||
supported = ["sdist", "wheel", "none"]
|
supported = ["sdist", "wheel", "none"]
|
||||||
for form in formats:
|
for form in formats:
|
||||||
|
@ -208,7 +261,7 @@ def get_meta(
|
||||||
nlp = util.load_model_from_path(Path(model_path))
|
nlp = util.load_model_from_path(Path(model_path))
|
||||||
meta.update(nlp.meta)
|
meta.update(nlp.meta)
|
||||||
meta.update(existing_meta)
|
meta.update(existing_meta)
|
||||||
meta["spacy_version"] = util.get_model_version_range(about.__version__)
|
meta["spacy_version"] = util.get_minor_version_range(about.__version__)
|
||||||
meta["vectors"] = {
|
meta["vectors"] = {
|
||||||
"width": nlp.vocab.vectors_length,
|
"width": nlp.vocab.vectors_length,
|
||||||
"vectors": len(nlp.vocab.vectors),
|
"vectors": len(nlp.vocab.vectors),
|
||||||
|
@ -217,6 +270,11 @@ def get_meta(
|
||||||
}
|
}
|
||||||
if about.__title__ != "spacy":
|
if about.__title__ != "spacy":
|
||||||
meta["parent_package"] = about.__title__
|
meta["parent_package"] = about.__title__
|
||||||
|
meta.setdefault("requirements", [])
|
||||||
|
# Update the requirements with all third-party packages in the config
|
||||||
|
existing_reqs = [util.split_requirement(req)[0] for req in meta["requirements"]]
|
||||||
|
reqs = get_third_party_dependencies(nlp.config, exclude=existing_reqs)
|
||||||
|
meta["requirements"].extend(reqs)
|
||||||
return meta
|
return meta
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,14 @@ try: # Python 3.8+
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from typing_extensions import Literal # noqa: F401
|
from typing_extensions import Literal # noqa: F401
|
||||||
|
|
||||||
|
# Important note: The importlib_metadata "backport" includes functionality
|
||||||
|
# that's not part of the built-in importlib.metadata. We should treat this
|
||||||
|
# import like the built-in and only use what's available there.
|
||||||
|
try: # Python 3.8+
|
||||||
|
import importlib.metadata as importlib_metadata
|
||||||
|
except ImportError:
|
||||||
|
from catalogue import _importlib_metadata as importlib_metadata # noqa: F401
|
||||||
|
|
||||||
from thinc.api import Optimizer # noqa: F401
|
from thinc.api import Optimizer # noqa: F401
|
||||||
|
|
||||||
pickle = pickle
|
pickle = pickle
|
||||||
|
|
|
@ -199,7 +199,7 @@ class Language:
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#meta
|
DOCS: https://spacy.io/api/language#meta
|
||||||
"""
|
"""
|
||||||
spacy_version = util.get_model_version_range(about.__version__)
|
spacy_version = util.get_minor_version_range(about.__version__)
|
||||||
if self.vocab.lang:
|
if self.vocab.lang:
|
||||||
self._meta.setdefault("lang", self.vocab.lang)
|
self._meta.setdefault("lang", self.vocab.lang)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -14,6 +14,7 @@ from spacy import about
|
||||||
from spacy.util import get_minor_version
|
from spacy.util import get_minor_version
|
||||||
from spacy.cli.validate import get_model_pkgs
|
from spacy.cli.validate import get_model_pkgs
|
||||||
from spacy.cli.download import get_compatibility, get_version
|
from spacy.cli.download import get_compatibility, get_version
|
||||||
|
from spacy.cli.package import get_third_party_dependencies
|
||||||
from thinc.api import ConfigValidationError, Config
|
from thinc.api import ConfigValidationError, Config
|
||||||
import srsly
|
import srsly
|
||||||
import os
|
import os
|
||||||
|
@ -532,3 +533,10 @@ def test_init_labels(component_name):
|
||||||
assert len(nlp2.get_pipe(component_name).labels) == 0
|
assert len(nlp2.get_pipe(component_name).labels) == 0
|
||||||
nlp2.initialize()
|
nlp2.initialize()
|
||||||
assert len(nlp2.get_pipe(component_name).labels) == 4
|
assert len(nlp2.get_pipe(component_name).labels) == 4
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_third_party_dependencies_runs():
|
||||||
|
# We can't easily test the detection of third-party packages here, but we
|
||||||
|
# can at least make sure that the function and its importlib magic runs.
|
||||||
|
nlp = Dutch()
|
||||||
|
assert get_third_party_dependencies(nlp.config) == []
|
||||||
|
|
|
@ -20,8 +20,10 @@ import sys
|
||||||
import warnings
|
import warnings
|
||||||
from packaging.specifiers import SpecifierSet, InvalidSpecifier
|
from packaging.specifiers import SpecifierSet, InvalidSpecifier
|
||||||
from packaging.version import Version, InvalidVersion
|
from packaging.version import Version, InvalidVersion
|
||||||
|
from packaging.requirements import Requirement
|
||||||
import subprocess
|
import subprocess
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
from collections import defaultdict
|
||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
import shlex
|
import shlex
|
||||||
|
@ -33,11 +35,6 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
cupy = None
|
cupy = None
|
||||||
|
|
||||||
try: # Python 3.8
|
|
||||||
import importlib.metadata as importlib_metadata
|
|
||||||
except ImportError:
|
|
||||||
from catalogue import _importlib_metadata as importlib_metadata
|
|
||||||
|
|
||||||
# These are functions that were previously (v2.x) available from spacy.util
|
# These are functions that were previously (v2.x) available from spacy.util
|
||||||
# and have since moved to Thinc. We're importing them here so people's code
|
# and have since moved to Thinc. We're importing them here so people's code
|
||||||
# doesn't break, but they should always be imported from Thinc from now on,
|
# doesn't break, but they should always be imported from Thinc from now on,
|
||||||
|
@ -46,7 +43,7 @@ from thinc.api import fix_random_seed, compounding, decaying # noqa: F401
|
||||||
|
|
||||||
|
|
||||||
from .symbols import ORTH
|
from .symbols import ORTH
|
||||||
from .compat import cupy, CudaStream, is_windows
|
from .compat import cupy, CudaStream, is_windows, importlib_metadata
|
||||||
from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS
|
from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS
|
||||||
from . import about
|
from . import about
|
||||||
|
|
||||||
|
@ -639,13 +636,18 @@ def is_unconstrained_version(
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def get_model_version_range(spacy_version: str) -> str:
|
def split_requirement(requirement: str) -> Tuple[str, str]:
|
||||||
"""Generate a version range like >=1.2.3,<1.3.0 based on a given spaCy
|
"""Split a requirement like spacy>=1.2.3 into ("spacy", ">=1.2.3")."""
|
||||||
version. Models are always compatible across patch versions but not
|
req = Requirement(requirement)
|
||||||
across minor or major versions.
|
return (req.name, str(req.specifier))
|
||||||
|
|
||||||
|
|
||||||
|
def get_minor_version_range(version: str) -> str:
|
||||||
|
"""Generate a version range like >=1.2.3,<1.3.0 based on a given version
|
||||||
|
(e.g. of spaCy).
|
||||||
"""
|
"""
|
||||||
release = Version(spacy_version).release
|
release = Version(version).release
|
||||||
return f">={spacy_version},<{release[0]}.{release[1] + 1}.0"
|
return f">={version},<{release[0]}.{release[1] + 1}.0"
|
||||||
|
|
||||||
|
|
||||||
def get_model_lower_version(constraint: str) -> Optional[str]:
|
def get_model_lower_version(constraint: str) -> Optional[str]:
|
||||||
|
@ -733,7 +735,7 @@ def load_meta(path: Union[str, Path]) -> Dict[str, Any]:
|
||||||
model=f"{meta['lang']}_{meta['name']}",
|
model=f"{meta['lang']}_{meta['name']}",
|
||||||
model_version=meta["version"],
|
model_version=meta["version"],
|
||||||
version=meta["spacy_version"],
|
version=meta["spacy_version"],
|
||||||
example=get_model_version_range(about.__version__),
|
example=get_minor_version_range(about.__version__),
|
||||||
)
|
)
|
||||||
warnings.warn(warn_msg)
|
warnings.warn(warn_msg)
|
||||||
return meta
|
return meta
|
||||||
|
@ -1549,3 +1551,19 @@ def to_ternary_int(val) -> int:
|
||||||
return 0
|
return 0
|
||||||
else:
|
else:
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
|
|
||||||
|
# The following implementation of packages_distributions() is adapted from
|
||||||
|
# importlib_metadata, which is distributed under the Apache 2.0 License.
|
||||||
|
# Copyright (c) 2017-2019 Jason R. Coombs, Barry Warsaw
|
||||||
|
# See licenses/3rd_party_licenses.txt
|
||||||
|
def packages_distributions() -> Dict[str, List[str]]:
|
||||||
|
"""Return a mapping of top-level packages to their distributions. We're
|
||||||
|
inlining this helper from the importlib_metadata "backport" here, since
|
||||||
|
it's not available in the builtin importlib.metadata.
|
||||||
|
"""
|
||||||
|
pkg_to_dist = defaultdict(list)
|
||||||
|
for dist in importlib_metadata.distributions():
|
||||||
|
for pkg in (dist.read_text("top_level.txt") or "").split():
|
||||||
|
pkg_to_dist[pkg].append(dist.metadata["Name"])
|
||||||
|
return dict(pkg_to_dist)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user