From ed47384da194a37d695d4eccbc755b4c04fa51de Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Thu, 26 Jan 2023 19:13:50 +0900 Subject: [PATCH] Don't re-download installed models When downloading a model, this checks if the same version of the same model is already installed. If it is then the download is skipped. This is necessary because pip uses the final download URL for its caching feature, but because of the way models are hosted on Github, their URLs change every few minutes. --- spacy/cli/download.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/spacy/cli/download.py b/spacy/cli/download.py index 4c998a6e0..45888a9b9 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -3,11 +3,12 @@ import requests import sys from wasabi import msg import typer +import srsly from ._util import app, Arg, Opt, WHEEL_SUFFIX, SDIST_SUFFIX from .. import about from ..util import is_package, get_minor_version, run_command -from ..util import is_prerelease_version +from ..util import is_prerelease_version, get_installed_models, get_package_path @app.command( @@ -63,6 +64,16 @@ def download( compatibility = get_compatibility() version = get_version(model_name, compatibility) + # If we already have this version installed, skip downloading + installed = get_installed_models() + if model_name in installed: + model_path = get_package_path(model_name) + meta_path = model_path / "meta.json" + meta = srsly.read_json(meta_path) + if meta["version"] == version: + msg.warn(f"{model_name} v{version} already installed, skipping") + return + filename = get_model_filename(model_name, version, sdist) download_model(filename, pip_args)