mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Add a way to get the URL to download a pipeline to the CLI (#11175)
* Add a dry run flag to download * Remove --dry-run, add --url option to `spacy info` instead * Make mypy happy * Print only the URL, so it's easier to use in scripts * Don't add the egg hash unless downloading an sdist * Update spacy/cli/info.py Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Add two implementations of requirements * Clean up requirements sample slightly This should make mypy happy * Update URL help string * Remove requirements option * Add url option to docs * Add URL to spacy info model output, when available * Add types-setuptools to testing reqs * Add types-setuptools to requirements * Add "compatible", expand docstring * Update spacy/cli/info.py Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Run prettier on CLI docs * Update docs Add a sidebar about finding download URLs, with some examples of the new command. * Add download URLs to table on model page * Apply suggestions from code review Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Updates from review * download url -> download link * Update docs Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
parent
71884d0942
commit
977dc33312
|
@ -34,4 +34,5 @@ mypy>=0.910,<0.970; platform_machine!='aarch64'
|
||||||
types-dataclasses>=0.1.3; python_version < "3.7"
|
types-dataclasses>=0.1.3; python_version < "3.7"
|
||||||
types-mock>=0.1.1
|
types-mock>=0.1.1
|
||||||
types-requests
|
types-requests
|
||||||
|
types-setuptools>=57.0.0
|
||||||
black>=22.0,<23.0
|
black>=22.0,<23.0
|
||||||
|
|
|
@ -20,7 +20,7 @@ def download_cli(
|
||||||
ctx: typer.Context,
|
ctx: typer.Context,
|
||||||
model: str = Arg(..., help="Name of pipeline package to download"),
|
model: str = Arg(..., help="Name of pipeline package to download"),
|
||||||
direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"),
|
direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"),
|
||||||
sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel")
|
sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"),
|
||||||
# fmt: on
|
# fmt: on
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -36,7 +36,12 @@ def download_cli(
|
||||||
download(model, direct, sdist, *ctx.args)
|
download(model, direct, sdist, *ctx.args)
|
||||||
|
|
||||||
|
|
||||||
def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -> None:
|
def download(
|
||||||
|
model: str,
|
||||||
|
direct: bool = False,
|
||||||
|
sdist: bool = False,
|
||||||
|
*pip_args,
|
||||||
|
) -> None:
|
||||||
if (
|
if (
|
||||||
not (is_package("spacy") or is_package("spacy-nightly"))
|
not (is_package("spacy") or is_package("spacy-nightly"))
|
||||||
and "--no-deps" not in pip_args
|
and "--no-deps" not in pip_args
|
||||||
|
@ -50,13 +55,10 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -
|
||||||
"dependencies, you'll have to install them manually."
|
"dependencies, you'll have to install them manually."
|
||||||
)
|
)
|
||||||
pip_args = pip_args + ("--no-deps",)
|
pip_args = pip_args + ("--no-deps",)
|
||||||
suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX
|
|
||||||
dl_tpl = "{m}-{v}/{m}-{v}{s}#egg={m}=={v}"
|
|
||||||
if direct:
|
if direct:
|
||||||
components = model.split("-")
|
components = model.split("-")
|
||||||
model_name = "".join(components[:-1])
|
model_name = "".join(components[:-1])
|
||||||
version = components[-1]
|
version = components[-1]
|
||||||
download_model(dl_tpl.format(m=model_name, v=version, s=suffix), pip_args)
|
|
||||||
else:
|
else:
|
||||||
model_name = model
|
model_name = model
|
||||||
if model in OLD_MODEL_SHORTCUTS:
|
if model in OLD_MODEL_SHORTCUTS:
|
||||||
|
@ -67,13 +69,26 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -
|
||||||
model_name = OLD_MODEL_SHORTCUTS[model]
|
model_name = OLD_MODEL_SHORTCUTS[model]
|
||||||
compatibility = get_compatibility()
|
compatibility = get_compatibility()
|
||||||
version = get_version(model_name, compatibility)
|
version = get_version(model_name, compatibility)
|
||||||
download_model(dl_tpl.format(m=model_name, v=version, s=suffix), pip_args)
|
|
||||||
|
filename = get_model_filename(model_name, version, sdist)
|
||||||
|
|
||||||
|
download_model(filename, pip_args)
|
||||||
msg.good(
|
msg.good(
|
||||||
"Download and installation successful",
|
"Download and installation successful",
|
||||||
f"You can now load the package via spacy.load('{model_name}')",
|
f"You can now load the package via spacy.load('{model_name}')",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_filename(model_name: str, version: str, sdist: bool = False) -> str:
|
||||||
|
dl_tpl = "{m}-{v}/{m}-{v}{s}"
|
||||||
|
egg_tpl = "#egg={m}=={v}"
|
||||||
|
suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX
|
||||||
|
filename = dl_tpl.format(m=model_name, v=version, s=suffix)
|
||||||
|
if sdist:
|
||||||
|
filename += egg_tpl.format(m=model_name, v=version)
|
||||||
|
return filename
|
||||||
|
|
||||||
|
|
||||||
def get_compatibility() -> dict:
|
def get_compatibility() -> dict:
|
||||||
if is_prerelease_version(about.__version__):
|
if is_prerelease_version(about.__version__):
|
||||||
version: Optional[str] = about.__version__
|
version: Optional[str] = about.__version__
|
||||||
|
@ -105,6 +120,11 @@ def get_version(model: str, comp: dict) -> str:
|
||||||
return comp[model][0]
|
return comp[model][0]
|
||||||
|
|
||||||
|
|
||||||
|
def get_latest_version(model: str) -> str:
|
||||||
|
comp = get_compatibility()
|
||||||
|
return get_version(model, comp)
|
||||||
|
|
||||||
|
|
||||||
def download_model(
|
def download_model(
|
||||||
filename: str, user_pip_args: Optional[Sequence[str]] = None
|
filename: str, user_pip_args: Optional[Sequence[str]] = None
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|
|
@ -1,10 +1,13 @@
|
||||||
from typing import Optional, Dict, Any, Union, List
|
from typing import Optional, Dict, Any, Union, List
|
||||||
import platform
|
import platform
|
||||||
|
import pkg_resources
|
||||||
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from wasabi import Printer, MarkdownRenderer
|
from wasabi import Printer, MarkdownRenderer
|
||||||
import srsly
|
import srsly
|
||||||
|
|
||||||
from ._util import app, Arg, Opt, string_to_list
|
from ._util import app, Arg, Opt, string_to_list
|
||||||
|
from .download import get_model_filename, get_latest_version
|
||||||
from .. import util
|
from .. import util
|
||||||
from .. import about
|
from .. import about
|
||||||
|
|
||||||
|
@ -16,6 +19,7 @@ def info_cli(
|
||||||
markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"),
|
markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"),
|
||||||
silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"),
|
silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"),
|
||||||
exclude: str = Opt("labels", "--exclude", "-e", help="Comma-separated keys to exclude from the print-out"),
|
exclude: str = Opt("labels", "--exclude", "-e", help="Comma-separated keys to exclude from the print-out"),
|
||||||
|
url: bool = Opt(False, "--url", "-u", help="Print the URL to download the most recent compatible version of the pipeline"),
|
||||||
# fmt: on
|
# fmt: on
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -23,10 +27,19 @@ def info_cli(
|
||||||
print its meta information. Flag --markdown prints details in Markdown for easy
|
print its meta information. Flag --markdown prints details in Markdown for easy
|
||||||
copy-pasting to GitHub issues.
|
copy-pasting to GitHub issues.
|
||||||
|
|
||||||
|
Flag --url prints only the download URL of the most recent compatible
|
||||||
|
version of the pipeline.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/cli#info
|
DOCS: https://spacy.io/api/cli#info
|
||||||
"""
|
"""
|
||||||
exclude = string_to_list(exclude)
|
exclude = string_to_list(exclude)
|
||||||
info(model, markdown=markdown, silent=silent, exclude=exclude)
|
info(
|
||||||
|
model,
|
||||||
|
markdown=markdown,
|
||||||
|
silent=silent,
|
||||||
|
exclude=exclude,
|
||||||
|
url=url,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def info(
|
def info(
|
||||||
|
@ -35,11 +48,20 @@ def info(
|
||||||
markdown: bool = False,
|
markdown: bool = False,
|
||||||
silent: bool = True,
|
silent: bool = True,
|
||||||
exclude: Optional[List[str]] = None,
|
exclude: Optional[List[str]] = None,
|
||||||
|
url: bool = False,
|
||||||
) -> Union[str, dict]:
|
) -> Union[str, dict]:
|
||||||
msg = Printer(no_print=silent, pretty=not silent)
|
msg = Printer(no_print=silent, pretty=not silent)
|
||||||
if not exclude:
|
if not exclude:
|
||||||
exclude = []
|
exclude = []
|
||||||
if model:
|
if url:
|
||||||
|
if model is not None:
|
||||||
|
title = f"Download info for pipeline '{model}'"
|
||||||
|
data = info_model_url(model)
|
||||||
|
print(data["download_url"])
|
||||||
|
return data
|
||||||
|
else:
|
||||||
|
msg.fail("--url option requires a pipeline name", exits=1)
|
||||||
|
elif model:
|
||||||
title = f"Info about pipeline '{model}'"
|
title = f"Info about pipeline '{model}'"
|
||||||
data = info_model(model, silent=silent)
|
data = info_model(model, silent=silent)
|
||||||
else:
|
else:
|
||||||
|
@ -99,11 +121,43 @@ def info_model(model: str, *, silent: bool = True) -> Dict[str, Any]:
|
||||||
meta["source"] = str(model_path.resolve())
|
meta["source"] = str(model_path.resolve())
|
||||||
else:
|
else:
|
||||||
meta["source"] = str(model_path)
|
meta["source"] = str(model_path)
|
||||||
|
download_url = info_installed_model_url(model)
|
||||||
|
if download_url:
|
||||||
|
meta["download_url"] = download_url
|
||||||
return {
|
return {
|
||||||
k: v for k, v in meta.items() if k not in ("accuracy", "performance", "speed")
|
k: v for k, v in meta.items() if k not in ("accuracy", "performance", "speed")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def info_installed_model_url(model: str) -> Optional[str]:
|
||||||
|
"""Given a pipeline name, get the download URL if available, otherwise
|
||||||
|
return None.
|
||||||
|
|
||||||
|
This is only available for pipelines installed as modules that have
|
||||||
|
dist-info available.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
dist = pkg_resources.get_distribution(model)
|
||||||
|
data = json.loads(dist.get_metadata("direct_url.json"))
|
||||||
|
return data["url"]
|
||||||
|
except pkg_resources.DistributionNotFound:
|
||||||
|
# no such package
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
# something else, like no file or invalid JSON
|
||||||
|
return None
|
||||||
|
|
||||||
|
def info_model_url(model: str) -> Dict[str, Any]:
|
||||||
|
"""Return the download URL for the latest version of a pipeline."""
|
||||||
|
version = get_latest_version(model)
|
||||||
|
|
||||||
|
filename = get_model_filename(model, version)
|
||||||
|
download_url = about.__download_url__ + "/" + filename
|
||||||
|
release_tpl = "https://github.com/explosion/spacy-models/releases/tag/{m}-{v}"
|
||||||
|
release_url = release_tpl.format(m=model, v=version)
|
||||||
|
return {"download_url": download_url, "release_url": release_url}
|
||||||
|
|
||||||
|
|
||||||
def get_markdown(
|
def get_markdown(
|
||||||
data: Dict[str, Any],
|
data: Dict[str, Any],
|
||||||
title: Optional[str] = None,
|
title: Optional[str] = None,
|
||||||
|
|
|
@ -17,6 +17,7 @@ def test_build_dependencies():
|
||||||
"types-dataclasses",
|
"types-dataclasses",
|
||||||
"types-mock",
|
"types-mock",
|
||||||
"types-requests",
|
"types-requests",
|
||||||
|
"types-setuptools",
|
||||||
]
|
]
|
||||||
# ignore language-specific packages that shouldn't be installed by all
|
# ignore language-specific packages that shouldn't be installed by all
|
||||||
libs_ignore_setup = [
|
libs_ignore_setup = [
|
||||||
|
|
|
@ -78,11 +78,12 @@ $ python -m spacy info [model] [--markdown] [--silent] [--exclude]
|
||||||
```
|
```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ------------------------------------------------ | --------------------------------------------------------------------------------------------- |
|
| ------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `model` | A trained pipeline, i.e. package name or path (optional). ~~Optional[str] \(option)~~ |
|
| `model` | A trained pipeline, i.e. package name or path (optional). ~~Optional[str] \(option)~~ |
|
||||||
| `--markdown`, `-md` | Print information as Markdown. ~~bool (flag)~~ |
|
| `--markdown`, `-md` | Print information as Markdown. ~~bool (flag)~~ |
|
||||||
| `--silent`, `-s` <Tag variant="new">2.0.12</Tag> | Don't print anything, just return the values. ~~bool (flag)~~ |
|
| `--silent`, `-s` <Tag variant="new">2.0.12</Tag> | Don't print anything, just return the values. ~~bool (flag)~~ |
|
||||||
| `--exclude`, `-e` | Comma-separated keys to exclude from the print-out. Defaults to `"labels"`. ~~Optional[str]~~ |
|
| `--exclude`, `-e` | Comma-separated keys to exclude from the print-out. Defaults to `"labels"`. ~~Optional[str]~~ |
|
||||||
|
| `--url`, `-u` <Tag variant="new">3.5.0</Tag> | Print the URL to download the most recent compatible version of the pipeline. Requires a pipeline name. ~~bool (flag)~~ |
|
||||||
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
|
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
|
||||||
| **PRINTS** | Information about your spaCy installation. |
|
| **PRINTS** | Information about your spaCy installation. |
|
||||||
|
|
||||||
|
|
|
@ -365,15 +365,32 @@ pipeline package can be found.
|
||||||
To download a trained pipeline directly using
|
To download a trained pipeline directly using
|
||||||
[pip](https://pypi.python.org/pypi/pip), point `pip install` to the URL or local
|
[pip](https://pypi.python.org/pypi/pip), point `pip install` to the URL or local
|
||||||
path of the wheel file or archive. Installing the wheel is usually more
|
path of the wheel file or archive. Installing the wheel is usually more
|
||||||
efficient. To find the direct link to a package, head over to the
|
efficient.
|
||||||
[releases](https://github.com/explosion/spacy-models/releases), right click on
|
|
||||||
the archive link and copy it to your clipboard.
|
> #### Pipeline Package URLs {#pipeline-urls}
|
||||||
|
>
|
||||||
|
> Pretrained pipeline distributions are hosted on
|
||||||
|
> [Github Releases](https://github.com/explosion/spacy-models/releases), and you
|
||||||
|
> can find download links there, as well as on the model page. You can also get
|
||||||
|
> URLs directly from the command line by using `spacy info` with the `--url`
|
||||||
|
> flag, which may be useful for automation.
|
||||||
|
>
|
||||||
|
> ```bash
|
||||||
|
> spacy info en_core_web_sm --url
|
||||||
|
> ```
|
||||||
|
>
|
||||||
|
> This command will print the URL for the latest version of a pipeline
|
||||||
|
> compatible with the version of spaCy you're using. Note that in order to look
|
||||||
|
> up the compatibility information an internet connection is required.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# With external URL
|
# With external URL
|
||||||
$ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0-py3-none-any.whl
|
$ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0-py3-none-any.whl
|
||||||
$ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
|
$ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
|
||||||
|
|
||||||
|
# Using spacy info to get the external URL
|
||||||
|
$ pip install $(spacy info en_core_web_sm --url)
|
||||||
|
|
||||||
# With local file
|
# With local file
|
||||||
$ pip install /Users/you/en_core_web_sm-3.0.0-py3-none-any.whl
|
$ pip install /Users/you/en_core_web_sm-3.0.0-py3-none-any.whl
|
||||||
$ pip install /Users/you/en_core_web_sm-3.0.0.tar.gz
|
$ pip install /Users/you/en_core_web_sm-3.0.0.tar.gz
|
||||||
|
@ -514,21 +531,16 @@ should be specifying them directly.
|
||||||
Because pipeline packages are valid Python packages, you can add them to your
|
Because pipeline packages are valid Python packages, you can add them to your
|
||||||
application's `requirements.txt`. If you're running your own internal PyPi
|
application's `requirements.txt`. If you're running your own internal PyPi
|
||||||
installation, you can upload the pipeline packages there. pip's
|
installation, you can upload the pipeline packages there. pip's
|
||||||
[requirements file format](https://pip.pypa.io/en/latest/reference/pip_install/#requirements-file-format)
|
[requirements file format](https://pip.pypa.io/en/latest/reference/requirements-file-format/)
|
||||||
supports both package names to download via a PyPi server, as well as direct
|
supports both package names to download via a PyPi server, as well as
|
||||||
URLs.
|
[direct URLs](#pipeline-urls).
|
||||||
|
|
||||||
```text
|
```text
|
||||||
### requirements.txt
|
### requirements.txt
|
||||||
spacy>=3.0.0,<4.0.0
|
spacy>=3.0.0,<4.0.0
|
||||||
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web_sm
|
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0-py3-none-any.whl
|
||||||
```
|
```
|
||||||
|
|
||||||
Specifying `#egg=` with the package name tells pip which package to expect from
|
|
||||||
the download URL. This way, the package won't be re-downloaded and overwritten
|
|
||||||
if it's already installed - just like when you're downloading a package from
|
|
||||||
PyPi.
|
|
||||||
|
|
||||||
All pipeline packages are versioned and specify their spaCy dependency. This
|
All pipeline packages are versioned and specify their spaCy dependency. This
|
||||||
ensures cross-compatibility and lets you specify exact version requirements for
|
ensures cross-compatibility and lets you specify exact version requirements for
|
||||||
each pipeline. If you've [trained](/usage/training) your own pipeline, you can
|
each pipeline. If you've [trained](/usage/training) your own pipeline, you can
|
||||||
|
|
|
@ -76,6 +76,7 @@ const MODEL_META = {
|
||||||
benchmark_ner: 'NER accuracy',
|
benchmark_ner: 'NER accuracy',
|
||||||
benchmark_speed: 'Speed',
|
benchmark_speed: 'Speed',
|
||||||
compat: 'Latest compatible package version for your spaCy installation',
|
compat: 'Latest compatible package version for your spaCy installation',
|
||||||
|
download_link: 'Download link for the pipeline',
|
||||||
}
|
}
|
||||||
|
|
||||||
const LABEL_SCHEME_META = {
|
const LABEL_SCHEME_META = {
|
||||||
|
@ -138,6 +139,13 @@ function formatAccuracy(data, lang) {
|
||||||
.filter(item => item)
|
.filter(item => item)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function formatDownloadLink(lang, name, version) {
|
||||||
|
const fullName = `${lang}_${name}-${version}`
|
||||||
|
const filename = `${fullName}-py3-none-any.whl`
|
||||||
|
const url = `https://github.com/explosion/spacy-models/releases/download/${fullName}/${filename}`
|
||||||
|
return <Link to={url} hideIcon>{filename}</Link>
|
||||||
|
}
|
||||||
|
|
||||||
function formatModelMeta(data) {
|
function formatModelMeta(data) {
|
||||||
return {
|
return {
|
||||||
fullName: `${data.lang}_${data.name}-${data.version}`,
|
fullName: `${data.lang}_${data.name}-${data.version}`,
|
||||||
|
@ -154,6 +162,7 @@ function formatModelMeta(data) {
|
||||||
labels: isEmptyObj(data.labels) ? null : data.labels,
|
labels: isEmptyObj(data.labels) ? null : data.labels,
|
||||||
vectors: formatVectors(data.vectors),
|
vectors: formatVectors(data.vectors),
|
||||||
accuracy: formatAccuracy(data.performance, data.lang),
|
accuracy: formatAccuracy(data.performance, data.lang),
|
||||||
|
download_link: formatDownloadLink(data.lang, data.name, data.version),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -244,6 +253,7 @@ const Model = ({
|
||||||
{ label: 'Components', content: components, help: MODEL_META.components },
|
{ label: 'Components', content: components, help: MODEL_META.components },
|
||||||
{ label: 'Pipeline', content: pipeline, help: MODEL_META.pipeline },
|
{ label: 'Pipeline', content: pipeline, help: MODEL_META.pipeline },
|
||||||
{ label: 'Vectors', content: meta.vectors, help: MODEL_META.vecs },
|
{ label: 'Vectors', content: meta.vectors, help: MODEL_META.vecs },
|
||||||
|
{ label: 'Download Link', content: meta.download_link, help: MODEL_META.download_link },
|
||||||
{ label: 'Sources', content: sources, help: MODEL_META.sources },
|
{ label: 'Sources', content: sources, help: MODEL_META.sources },
|
||||||
{ label: 'Author', content: author },
|
{ label: 'Author', content: author },
|
||||||
{ label: 'License', content: license },
|
{ label: 'License', content: license },
|
||||||
|
|
Loading…
Reference in New Issue
Block a user