import importlib.metadata import json import platform from pathlib import Path from typing import Any, Dict, List, Optional, Union import srsly from wasabi import MarkdownRenderer, Printer from .. import about, util from ._util import Arg, Opt, app, string_to_list from .download import get_latest_version, get_model_filename @app.command("info") def info_cli( # fmt: off model: Optional[str] = Arg(None, help="Optional loadable spaCy pipeline"), markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"), silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"), exclude: str = Opt("labels", "--exclude", "-e", help="Comma-separated keys to exclude from the print-out"), url: bool = Opt(False, "--url", "-u", help="Print the URL to download the most recent compatible version of the pipeline"), # fmt: on ): """ Print info about spaCy installation. If a pipeline is specified as an argument, print its meta information. Flag --markdown prints details in Markdown for easy copy-pasting to GitHub issues. Flag --url prints only the download URL of the most recent compatible version of the pipeline. DOCS: https://spacy.io/api/cli#info """ exclude = string_to_list(exclude) info( model, markdown=markdown, silent=silent, exclude=exclude, url=url, ) def info( model: Optional[str] = None, *, markdown: bool = False, silent: bool = True, exclude: Optional[List[str]] = None, url: bool = False, ) -> Union[str, dict]: msg = Printer(no_print=silent, pretty=not silent) if not exclude: exclude = [] if url: if model is not None: title = f"Download info for pipeline '{model}'" data = info_model_url(model) print(data["download_url"]) return data else: msg.fail("--url option requires a pipeline name", exits=1) elif model: title = f"Info about pipeline '{model}'" data = info_model(model, silent=silent) else: title = "Info about spaCy" data = info_spacy() raw_data = {k.lower().replace(" ", "_"): v for k, v in data.items()} if "Pipelines" in data and isinstance(data["Pipelines"], dict): data["Pipelines"] = ", ".join( f"{n} ({v})" for n, v in data["Pipelines"].items() ) markdown_data = get_markdown(data, title=title, exclude=exclude) if markdown: if not silent: print(markdown_data) return markdown_data if not silent: table_data = {k: v for k, v in data.items() if k not in exclude} msg.table(table_data, title=title) return raw_data def info_spacy() -> Dict[str, Any]: """Generate info about the current spaCy intallation. RETURNS (dict): The spaCy info. """ all_models = {} for pkg_name in util.get_installed_models(): package = pkg_name.replace("-", "_") all_models[package] = util.get_package_version(pkg_name) return { "spaCy version": about.__version__, "Location": str(Path(__file__).parent.parent), "Platform": platform.platform(), "Python version": platform.python_version(), "Pipelines": all_models, } def info_model(model: str, *, silent: bool = True) -> Dict[str, Any]: """Generate info about a specific model. model (str): Model name of path. silent (bool): Don't print anything, just return. RETURNS (dict): The model meta. """ msg = Printer(no_print=silent, pretty=not silent) if util.is_package(model): model_path = util.get_package_path(model) else: model_path = Path(model) meta_path = model_path / "meta.json" if not meta_path.is_file(): msg.fail("Can't find pipeline meta.json", meta_path, exits=1) meta = srsly.read_json(meta_path) if model_path.resolve() != model_path: meta["source"] = str(model_path.resolve()) else: meta["source"] = str(model_path) download_url = info_installed_model_url(model) if download_url: meta["download_url"] = download_url return { k: v for k, v in meta.items() if k not in ("accuracy", "performance", "speed") } def info_installed_model_url(model: str) -> Optional[str]: """Given a pipeline name, get the download URL if available, otherwise return None. This is only available for pipelines installed as modules that have dist-info available. """ try: dist = importlib.metadata.distribution(model) text = dist.read_text("direct_url.json") if isinstance(text, str): data = json.loads(text) return data["url"] except Exception: pass return None def info_model_url(model: str) -> Dict[str, Any]: """Return the download URL for the latest version of a pipeline.""" version = get_latest_version(model) filename = get_model_filename(model, version) download_url = about.__download_url__ + "/" + filename release_tpl = "https://github.com/explosion/spacy-models/releases/tag/{m}-{v}" release_url = release_tpl.format(m=model, v=version) return {"download_url": download_url, "release_url": release_url} def get_markdown( data: Dict[str, Any], title: Optional[str] = None, exclude: Optional[List[str]] = None, ) -> str: """Get data in GitHub-flavoured Markdown format for issues etc. data (Dict[str, Any]): Label/value pairs. title (str): Optional title, will be rendered as headline 2. exclude (List[str]): Names of keys to exclude. RETURNS (str): The Markdown string. """ md = MarkdownRenderer() if title: md.add(md.title(2, title)) items = [] for key, value in data.items(): if exclude and key in exclude: continue if isinstance(value, str): try: existing_path = Path(value).exists() except Exception: # invalid Path, like a URL string existing_path = False if existing_path: continue items.append(f"{md.bold(f'{key}:')} {value}") md.add(md.list(items)) return f"\n{md.text}\n"