mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-25 00:34:20 +03:00
Include spaCy version check in project CLI
This commit is contained in:
parent
20f2a17a09
commit
6958510bda
|
@ -1,4 +1,4 @@
|
|||
from typing import Dict, Any, Union, List, Optional, Tuple, TYPE_CHECKING
|
||||
from typing import Dict, Any, Union, List, Optional, Tuple, Iterable, TYPE_CHECKING
|
||||
import sys
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
@ -193,12 +193,15 @@ def validate_project_commands(config: Dict[str, Any]) -> None:
|
|||
)
|
||||
|
||||
|
||||
def get_hash(data) -> str:
|
||||
def get_hash(data, exclude: Iterable[str] = tuple()) -> str:
|
||||
"""Get the hash for a JSON-serializable object.
|
||||
|
||||
data: The data to hash.
|
||||
exclude (Iterable[str]): Top-level keys to exclude if data is a dict.
|
||||
RETURNS (str): The hash.
|
||||
"""
|
||||
if isinstance(data, dict):
|
||||
data = {k: v for k, v in data.items() if k not in exclude}
|
||||
data_str = srsly.json_dumps(data, sort_keys=True).encode("utf8")
|
||||
return hashlib.md5(data_str).hexdigest()
|
||||
|
||||
|
|
|
@ -7,7 +7,8 @@ import tarfile
|
|||
from pathlib import Path
|
||||
|
||||
from .._util import get_hash, get_checksum, download_file, ensure_pathy
|
||||
from ...util import make_tempdir
|
||||
from ...util import make_tempdir, get_minor_version
|
||||
from ... import about
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathy import Pathy # noqa: F401
|
||||
|
@ -129,7 +130,9 @@ def get_command_hash(
|
|||
currently installed packages, whatever environment variables have been marked
|
||||
as relevant, and the command.
|
||||
"""
|
||||
hashes = [site_hash, env_hash] + [get_checksum(dep) for dep in sorted(deps)]
|
||||
spacy_v = get_minor_version(about.__version__)
|
||||
dep_checksums = [get_checksum(dep) for dep in sorted(deps)]
|
||||
hashes = [spacy_v, site_hash, env_hash] + dep_checksums
|
||||
hashes.extend(cmd)
|
||||
creation_bytes = "".join(hashes).encode("utf8")
|
||||
return hashlib.md5(creation_bytes).hexdigest()
|
||||
|
|
|
@ -4,8 +4,10 @@ from wasabi import msg
|
|||
import sys
|
||||
import srsly
|
||||
|
||||
from ... import about
|
||||
from ...git_info import GIT_VERSION
|
||||
from ...util import working_dir, run_command, split_command, is_cwd, join_command
|
||||
from ...util import SimpleFrozenList
|
||||
from ...util import SimpleFrozenList, is_minor_version_match
|
||||
from .._util import PROJECT_FILE, PROJECT_LOCK, load_project_config, get_hash
|
||||
from .._util import get_checksum, project_cli, Arg, Opt, COMMAND
|
||||
|
||||
|
@ -63,11 +65,11 @@ def project_run(
|
|||
err_kwargs = {"exits": 1} if not dry else {}
|
||||
msg.fail(err, err_help, **err_kwargs)
|
||||
with working_dir(project_dir) as current_dir:
|
||||
msg.divider(subcommand)
|
||||
rerun = check_rerun(current_dir, cmd)
|
||||
if not rerun and not force:
|
||||
msg.info(f"Skipping '{cmd['name']}': nothing changed")
|
||||
else:
|
||||
msg.divider(subcommand)
|
||||
run_commands(cmd["script"], dry=dry)
|
||||
if not dry:
|
||||
update_lockfile(current_dir, cmd)
|
||||
|
@ -171,12 +173,18 @@ def validate_subcommand(
|
|||
)
|
||||
|
||||
|
||||
def check_rerun(project_dir: Path, command: Dict[str, Any]) -> bool:
|
||||
def check_rerun(
|
||||
project_dir: Path,
|
||||
command: Dict[str, Any],
|
||||
check_spacy_version: bool = True,
|
||||
check_spacy_commit: bool = False,
|
||||
) -> bool:
|
||||
"""Check if a command should be rerun because its settings or inputs/outputs
|
||||
changed.
|
||||
|
||||
project_dir (Path): The current project directory.
|
||||
command (Dict[str, Any]): The command, as defined in the project.yml.
|
||||
strict_version (bool):
|
||||
RETURNS (bool): Whether to re-run the command.
|
||||
"""
|
||||
lock_path = project_dir / PROJECT_LOCK
|
||||
|
@ -189,10 +197,23 @@ def check_rerun(project_dir: Path, command: Dict[str, Any]) -> bool:
|
|||
# Always run commands with no outputs (otherwise they'd always be skipped)
|
||||
if not entry.get("outs", []):
|
||||
return True
|
||||
# Always rerun if spaCy version or commit hash changed
|
||||
spacy_v = entry.get("spacy_version")
|
||||
commit = entry.get("spacy_git_version")
|
||||
if check_spacy_version and not is_minor_version_match(spacy_v, about.__version__):
|
||||
info = f"({spacy_v} in {PROJECT_LOCK}, {about.__version__} current)"
|
||||
msg.info(f"Re-running '{command['name']}': spaCy minor version changed {info}")
|
||||
return True
|
||||
if check_spacy_commit and commit != GIT_VERSION:
|
||||
info = f"({commit} in {PROJECT_LOCK}, {GIT_VERSION} current)"
|
||||
msg.info(f"Re-running '{command['name']}': spaCy commit changed {info}")
|
||||
return True
|
||||
# If the entry in the lockfile matches the lockfile entry that would be
|
||||
# generated from the current command, we don't rerun because it means that
|
||||
# all inputs/outputs, hashes and scripts are the same and nothing changed
|
||||
return get_hash(get_lock_entry(project_dir, command)) != get_hash(entry)
|
||||
lock_entry = get_lock_entry(project_dir, command)
|
||||
exclude = ["spacy_version", "spacy_git_version"]
|
||||
return get_hash(lock_entry, exclude=exclude) != get_hash(entry, exclude=exclude)
|
||||
|
||||
|
||||
def update_lockfile(project_dir: Path, command: Dict[str, Any]) -> None:
|
||||
|
@ -231,6 +252,8 @@ def get_lock_entry(project_dir: Path, command: Dict[str, Any]) -> Dict[str, Any]
|
|||
"script": command["script"],
|
||||
"deps": deps,
|
||||
"outs": [*outs, *outs_nc],
|
||||
"spacy_version": about.__version__,
|
||||
"spacy_git_version": GIT_VERSION,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -149,6 +149,21 @@ def test_is_unconstrained_version(constraint, expected):
|
|||
assert util.is_unconstrained_version(constraint) is expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a1,a2,b1,b2,is_match",
|
||||
[
|
||||
("3.0.0", "3.0", "3.0.1", "3.0", True),
|
||||
("3.1.0", "3.1", "3.2.1", "3.2", False),
|
||||
("xxx", None, "1.2.3.dev0", "1.2", False),
|
||||
],
|
||||
)
|
||||
def test_minor_version(a1, a2, b1, b2, is_match):
|
||||
assert util.get_minor_version(a1) == a2
|
||||
assert util.get_minor_version(b1) == b2
|
||||
assert util.is_minor_version_match(a1, b1) is is_match
|
||||
assert util.is_minor_version_match(a2, b2) is is_match
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dot_notation,expected",
|
||||
[
|
||||
|
|
|
@ -584,6 +584,33 @@ def get_base_version(version: str) -> str:
|
|||
return Version(version).base_version
|
||||
|
||||
|
||||
def get_minor_version(version: str) -> Optional[str]:
|
||||
"""Get the major + minor version (without patch or prerelease identifiers).
|
||||
|
||||
version (str): The version.
|
||||
RETURNS (str): The major + minor version or None if version is invalid.
|
||||
"""
|
||||
try:
|
||||
v = Version(version)
|
||||
except (TypeError, InvalidVersion):
|
||||
return None
|
||||
return f"{v.major}.{v.minor}"
|
||||
|
||||
|
||||
def is_minor_version_match(version_a: str, version_b: str) -> bool:
|
||||
"""Compare two versions and check if they match in major and minor, without
|
||||
patch or prerelease identifiers. Used internally for compatibility checks
|
||||
that should be insensitive to patch releases.
|
||||
|
||||
version_a (str): The first version
|
||||
version_b (str): The second version.
|
||||
RETURNS (bool): Whether the versions match.
|
||||
"""
|
||||
a = get_minor_version(version_a)
|
||||
b = get_minor_version(version_b)
|
||||
return a is not None and b is not None and a == b
|
||||
|
||||
|
||||
def load_meta(path: Union[str, Path]) -> Dict[str, Any]:
|
||||
"""Load a model meta.json from a path and validate its contents.
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user