spaCy/spacy/cli/info.py
Raphael Mitsch 749e446ee3 Merge branch 'master' into sync/master-into-v4
# Conflicts:
#	.github/azure-steps.yml
2023-03-06 16:27:56 +01:00

191 lines
6.2 KiB
Python

from typing import Optional, Dict, Any, Union, List
import platform
import json
from pathlib import Path
from wasabi import Printer, MarkdownRenderer
import srsly
import importlib.metadata
from ._util import app, Arg, Opt, string_to_list
from .download import get_model_filename, get_latest_version
from .. import util
from .. import about
@app.command("info")
def info_cli(
# fmt: off
model: Optional[str] = Arg(None, help="Optional loadable spaCy pipeline"),
markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"),
silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"),
exclude: str = Opt("labels", "--exclude", "-e", help="Comma-separated keys to exclude from the print-out"),
url: bool = Opt(False, "--url", "-u", help="Print the URL to download the most recent compatible version of the pipeline"),
# fmt: on
):
"""
Print info about spaCy installation. If a pipeline is specified as an argument,
print its meta information. Flag --markdown prints details in Markdown for easy
copy-pasting to GitHub issues.
Flag --url prints only the download URL of the most recent compatible
version of the pipeline.
DOCS: https://spacy.io/api/cli#info
"""
exclude = string_to_list(exclude)
info(
model,
markdown=markdown,
silent=silent,
exclude=exclude,
url=url,
)
def info(
model: Optional[str] = None,
*,
markdown: bool = False,
silent: bool = True,
exclude: Optional[List[str]] = None,
url: bool = False,
) -> Union[str, dict]:
msg = Printer(no_print=silent, pretty=not silent)
if not exclude:
exclude = []
if url:
if model is not None:
title = f"Download info for pipeline '{model}'"
data = info_model_url(model)
print(data["download_url"])
return data
else:
msg.fail("--url option requires a pipeline name", exits=1)
elif model:
title = f"Info about pipeline '{model}'"
data = info_model(model, silent=silent)
else:
title = "Info about spaCy"
data = info_spacy()
raw_data = {k.lower().replace(" ", "_"): v for k, v in data.items()}
if "Pipelines" in data and isinstance(data["Pipelines"], dict):
data["Pipelines"] = ", ".join(
f"{n} ({v})" for n, v in data["Pipelines"].items()
)
markdown_data = get_markdown(data, title=title, exclude=exclude)
if markdown:
if not silent:
print(markdown_data)
return markdown_data
if not silent:
table_data = {k: v for k, v in data.items() if k not in exclude}
msg.table(table_data, title=title)
return raw_data
def info_spacy() -> Dict[str, Any]:
"""Generate info about the current spaCy intallation.
RETURNS (dict): The spaCy info.
"""
all_models = {}
for pkg_name in util.get_installed_models():
package = pkg_name.replace("-", "_")
all_models[package] = util.get_package_version(pkg_name)
return {
"spaCy version": about.__version__,
"Location": str(Path(__file__).parent.parent),
"Platform": platform.platform(),
"Python version": platform.python_version(),
"Pipelines": all_models,
}
def info_model(model: str, *, silent: bool = True) -> Dict[str, Any]:
"""Generate info about a specific model.
model (str): Model name of path.
silent (bool): Don't print anything, just return.
RETURNS (dict): The model meta.
"""
msg = Printer(no_print=silent, pretty=not silent)
if util.is_package(model):
model_path = util.get_package_path(model)
else:
model_path = Path(model)
meta_path = model_path / "meta.json"
if not meta_path.is_file():
msg.fail("Can't find pipeline meta.json", meta_path, exits=1)
meta = srsly.read_json(meta_path)
if model_path.resolve() != model_path:
meta["source"] = str(model_path.resolve())
else:
meta["source"] = str(model_path)
download_url = info_installed_model_url(model)
if download_url:
meta["download_url"] = download_url
return {
k: v for k, v in meta.items() if k not in ("accuracy", "performance", "speed")
}
def info_installed_model_url(model: str) -> Optional[str]:
"""Given a pipeline name, get the download URL if available, otherwise
return None.
This is only available for pipelines installed as modules that have
dist-info available.
"""
try:
dist = importlib.metadata.distribution(model)
text = dist.read_text("direct_url.json")
if isinstance(text, str):
data = json.loads(text)
return data["url"]
except Exception:
pass
return None
def info_model_url(model: str) -> Dict[str, Any]:
"""Return the download URL for the latest version of a pipeline."""
version = get_latest_version(model)
filename = get_model_filename(model, version)
download_url = about.__download_url__ + "/" + filename
release_tpl = "https://github.com/explosion/spacy-models/releases/tag/{m}-{v}"
release_url = release_tpl.format(m=model, v=version)
return {"download_url": download_url, "release_url": release_url}
def get_markdown(
data: Dict[str, Any],
title: Optional[str] = None,
exclude: Optional[List[str]] = None,
) -> str:
"""Get data in GitHub-flavoured Markdown format for issues etc.
data (Dict[str, Any]): Label/value pairs.
title (str): Optional title, will be rendered as headline 2.
exclude (List[str]): Names of keys to exclude.
RETURNS (str): The Markdown string.
"""
md = MarkdownRenderer()
if title:
md.add(md.title(2, title))
items = []
for key, value in data.items():
if exclude and key in exclude:
continue
if isinstance(value, str):
try:
existing_path = Path(value).exists()
except Exception:
# invalid Path, like a URL string
existing_path = False
if existing_path:
continue
items.append(f"{md.bold(f'{key}:')} {value}")
md.add(md.list(items))
return f"\n{md.text}\n"