From 1b5aba9e220a6081f006b08929eabc50b0be6c4b Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Tue, 31 Jan 2023 19:31:17 +0900 Subject: [PATCH] Don't re-download installed models (#12188) * Don't re-download installed models When downloading a model, this checks if the same version of the same model is already installed. If it is then the download is skipped. This is necessary because pip uses the final download URL for its caching feature, but because of the way models are hosted on Github, their URLs change every few minutes. * Use importlib instead of meta.json * Use get_package_version * Add untested, disabled test --------- Co-authored-by: Adriane Boyd --- .github/azure-steps.yml | 5 +++++ spacy/cli/download.py | 11 ++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml index d0db75f9a..7c3c3e0a6 100644 --- a/.github/azure-steps.yml +++ b/.github/azure-steps.yml @@ -62,6 +62,11 @@ steps: # - script: | # python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')" # displayName: 'Test no warnings on load (#11713)' +# condition: eq(variables['python_version'], '3.8') +# +# - script: | +# python -m spacy download ca_core_news_sm 2>&1 | grep -q skipping +# displayName: 'Test skip re-download (#12188)' # condition: eq(variables['python_version'], '3.8') - script: | diff --git a/spacy/cli/download.py b/spacy/cli/download.py index 4c998a6e0..90471c55e 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -7,7 +7,8 @@ import typer from ._util import app, Arg, Opt, WHEEL_SUFFIX, SDIST_SUFFIX from .. import about from ..util import is_package, get_minor_version, run_command -from ..util import is_prerelease_version +from ..util import is_prerelease_version, get_installed_models +from ..util import get_package_version @app.command( @@ -63,6 +64,14 @@ def download( compatibility = get_compatibility() version = get_version(model_name, compatibility) + # If we already have this version installed, skip downloading + installed = get_installed_models() + if model_name in installed: + installed_version = get_package_version(model_name) + if installed_version == version: + msg.warn(f"{model_name} v{version} already installed, skipping") + return + filename = get_model_filename(model_name, version, sdist) download_model(filename, pip_args)