Don't re-download installed models (#12188)

* Don't re-download installed models

When downloading a model, this checks if the same version of the same
model is already installed. If it is then the download is skipped.

This is necessary because pip uses the final download URL for its
caching feature, but because of the way models are hosted on Github,
their URLs change every few minutes.

* Use importlib instead of meta.json

* Use get_package_version

* Add untested, disabled test

---------

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
Paul O'Leary McCann 2023-01-31 19:31:17 +09:00 committed by GitHub
parent 6b07be2110
commit 1b5aba9e22
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 15 additions and 1 deletions

View File

@ -62,6 +62,11 @@ steps:
# - script: | # - script: |
# python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')" # python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
# displayName: 'Test no warnings on load (#11713)' # displayName: 'Test no warnings on load (#11713)'
# condition: eq(variables['python_version'], '3.8')
#
# - script: |
# python -m spacy download ca_core_news_sm 2>&1 | grep -q skipping
# displayName: 'Test skip re-download (#12188)'
# condition: eq(variables['python_version'], '3.8') # condition: eq(variables['python_version'], '3.8')
- script: | - script: |

View File

@ -7,7 +7,8 @@ import typer
from ._util import app, Arg, Opt, WHEEL_SUFFIX, SDIST_SUFFIX from ._util import app, Arg, Opt, WHEEL_SUFFIX, SDIST_SUFFIX
from .. import about from .. import about
from ..util import is_package, get_minor_version, run_command from ..util import is_package, get_minor_version, run_command
from ..util import is_prerelease_version from ..util import is_prerelease_version, get_installed_models
from ..util import get_package_version
@app.command( @app.command(
@ -63,6 +64,14 @@ def download(
compatibility = get_compatibility() compatibility = get_compatibility()
version = get_version(model_name, compatibility) version = get_version(model_name, compatibility)
# If we already have this version installed, skip downloading
installed = get_installed_models()
if model_name in installed:
installed_version = get_package_version(model_name)
if installed_version == version:
msg.warn(f"{model_name} v{version} already installed, skipping")
return
filename = get_model_filename(model_name, version, sdist) filename = get_model_filename(model_name, version, sdist)
download_model(filename, pip_args) download_model(filename, pip_args)