From a43e6d47b2602cd2e12013686b1501d2eb9b59fc Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 27 Mar 2023 18:52:03 +0200 Subject: [PATCH] remove get_hash and get_checksum --- spacy/cli/_util.py | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index dc6f501d4..2b6dd330f 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -131,39 +131,6 @@ def _parse_override(value: Any) -> Any: return str(value) -def get_hash(data, exclude: Iterable[str] = tuple()) -> str: - """Get the hash for a JSON-serializable object. - - data: The data to hash. - exclude (Iterable[str]): Top-level keys to exclude if data is a dict. - RETURNS (str): The hash. - """ - if isinstance(data, dict): - data = {k: v for k, v in data.items() if k not in exclude} - data_str = srsly.json_dumps(data, sort_keys=True).encode("utf8") - return hashlib.md5(data_str).hexdigest() - - -def get_checksum(path: Union[Path, str]) -> str: - """Get the checksum for a file or directory given its file path. If a - directory path is provided, this uses all files in that directory. - - path (Union[Path, str]): The file or directory path. - RETURNS (str): The checksum. - """ - path = Path(path) - if not (path.is_file() or path.is_dir()): - msg.fail(f"Can't get checksum for {path}: not a file or directory", exits=1) - if path.is_file(): - return hashlib.md5(Path(path).read_bytes()).hexdigest() - else: - # TODO: this is currently pretty slow - dir_checksum = hashlib.md5() - for sub_file in sorted(fp for fp in path.rglob("*") if fp.is_file()): - dir_checksum.update(sub_file.read_bytes()) - return dir_checksum.hexdigest() - - @contextmanager def show_validation_error( file_path: Optional[Union[str, Path]] = None,