2020-12-30 14:05:58 +03:00
|
|
|
from typing import Optional, Dict, Any, Union, List
|
2017-03-18 15:01:16 +03:00
|
|
|
import platform
|
|
|
|
from pathlib import Path
|
2020-08-26 16:33:11 +03:00
|
|
|
from wasabi import Printer, MarkdownRenderer
|
💫 Replace ujson, msgpack and dill/pickle/cloudpickle with srsly (#3003)
Remove hacks and wrappers, keep code in sync across our libraries and move spaCy a few steps closer to only depending on packages with binary wheels 🎉
See here: https://github.com/explosion/srsly
Serialization is hard, especially across Python versions and multiple platforms. After dealing with many subtle bugs over the years (encodings, locales, large files) our libraries like spaCy and Prodigy have steadily grown a number of utility functions to wrap the multiple serialization formats we need to support (especially json, msgpack and pickle). These wrapping functions ended up duplicated across our codebases, so we wanted to put them in one place.
At the same time, we noticed that having a lot of small dependencies was making maintainence harder, and making installation slower. To solve this, we've made srsly standalone, by including the component packages directly within it. This way we can provide all the serialization utilities we need in a single binary wheel.
srsly currently includes forks of the following packages:
ujson
msgpack
msgpack-numpy
cloudpickle
* WIP: replace json/ujson with srsly
* Replace ujson in examples
Use regular json instead of srsly to make code easier to read and follow
* Update requirements
* Fix imports
* Fix typos
* Replace msgpack with srsly
* Fix warning
2018-12-03 03:28:22 +03:00
|
|
|
import srsly
|
2017-03-18 20:57:45 +03:00
|
|
|
|
2020-12-30 14:05:58 +03:00
|
|
|
from ._util import app, Arg, Opt, string_to_list
|
2017-03-18 17:14:48 +03:00
|
|
|
from .. import util
|
2018-04-03 16:50:31 +03:00
|
|
|
from .. import about
|
2017-03-18 15:01:16 +03:00
|
|
|
|
|
|
|
|
2020-06-21 14:44:00 +03:00
|
|
|
@app.command("info")
|
2020-06-21 22:35:01 +03:00
|
|
|
def info_cli(
|
2020-06-21 14:44:00 +03:00
|
|
|
# fmt: off
|
2020-09-03 14:13:03 +03:00
|
|
|
model: Optional[str] = Arg(None, help="Optional loadable spaCy pipeline"),
|
2020-06-21 14:44:00 +03:00
|
|
|
markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"),
|
2020-06-22 01:57:28 +03:00
|
|
|
silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"),
|
2020-12-30 14:05:58 +03:00
|
|
|
exclude: Optional[str] = Opt("labels", "--exclude", "-e", help="Comma-separated keys to exclude from the print-out"),
|
2020-06-21 14:44:00 +03:00
|
|
|
# fmt: on
|
2020-01-01 15:15:46 +03:00
|
|
|
):
|
2018-11-30 22:16:14 +03:00
|
|
|
"""
|
2020-09-03 14:13:03 +03:00
|
|
|
Print info about spaCy installation. If a pipeline is speficied as an argument,
|
|
|
|
print its meta information. Flag --markdown prints details in Markdown for easy
|
2020-02-18 19:20:17 +03:00
|
|
|
copy-pasting to GitHub issues.
|
2020-09-04 13:58:50 +03:00
|
|
|
|
|
|
|
DOCS: https://nightly.spacy.io/api/cli#info
|
2017-05-22 13:28:58 +03:00
|
|
|
"""
|
2020-12-30 14:05:58 +03:00
|
|
|
exclude = string_to_list(exclude)
|
|
|
|
info(model, markdown=markdown, silent=silent, exclude=exclude)
|
2020-06-21 22:35:01 +03:00
|
|
|
|
|
|
|
|
|
|
|
def info(
|
2020-12-30 14:05:58 +03:00
|
|
|
model: Optional[str] = None, *, markdown: bool = False, silent: bool = True, exclude: List[str]
|
2020-06-21 22:35:01 +03:00
|
|
|
) -> Union[str, dict]:
|
|
|
|
msg = Printer(no_print=silent, pretty=not silent)
|
2017-03-18 15:01:16 +03:00
|
|
|
if model:
|
2020-09-03 14:13:03 +03:00
|
|
|
title = f"Info about pipeline '{model}'"
|
2020-06-21 22:35:01 +03:00
|
|
|
data = info_model(model, silent=silent)
|
|
|
|
else:
|
|
|
|
title = "Info about spaCy"
|
2020-06-22 02:17:11 +03:00
|
|
|
data = info_spacy()
|
|
|
|
raw_data = {k.lower().replace(" ", "_"): v for k, v in data.items()}
|
2020-09-03 14:13:03 +03:00
|
|
|
if "Pipelines" in data and isinstance(data["Pipelines"], dict):
|
|
|
|
data["Pipelines"] = ", ".join(
|
|
|
|
f"{n} ({v})" for n, v in data["Pipelines"].items()
|
|
|
|
)
|
2020-12-30 14:05:58 +03:00
|
|
|
markdown_data = get_markdown(data, title=title, exclude=exclude)
|
2020-06-21 22:35:01 +03:00
|
|
|
if markdown:
|
2018-04-29 02:59:44 +03:00
|
|
|
if not silent:
|
2020-06-21 22:35:01 +03:00
|
|
|
print(markdown_data)
|
|
|
|
return markdown_data
|
|
|
|
if not silent:
|
2020-12-30 14:05:58 +03:00
|
|
|
table_data = {k: v for k, v in data.items() if k not in exclude}
|
2020-06-22 02:17:11 +03:00
|
|
|
msg.table(table_data, title=title)
|
|
|
|
return raw_data
|
2020-06-21 22:35:01 +03:00
|
|
|
|
|
|
|
|
2020-06-22 02:17:11 +03:00
|
|
|
def info_spacy() -> Dict[str, any]:
|
2020-06-21 22:35:01 +03:00
|
|
|
"""Generate info about the current spaCy intallation.
|
|
|
|
|
|
|
|
RETURNS (dict): The spaCy info.
|
|
|
|
"""
|
2020-06-22 02:07:48 +03:00
|
|
|
all_models = {}
|
|
|
|
for pkg_name in util.get_installed_models():
|
|
|
|
package = pkg_name.replace("-", "_")
|
|
|
|
all_models[package] = util.get_package_version(pkg_name)
|
2020-06-21 22:35:01 +03:00
|
|
|
return {
|
2018-11-30 22:16:14 +03:00
|
|
|
"spaCy version": about.__version__,
|
2019-12-22 03:53:56 +03:00
|
|
|
"Location": str(Path(__file__).parent.parent),
|
2018-11-30 22:16:14 +03:00
|
|
|
"Platform": platform.platform(),
|
|
|
|
"Python version": platform.python_version(),
|
2020-09-03 14:13:03 +03:00
|
|
|
"Pipelines": all_models,
|
2018-11-30 22:16:14 +03:00
|
|
|
}
|
2017-03-18 15:01:16 +03:00
|
|
|
|
|
|
|
|
2020-06-21 22:35:01 +03:00
|
|
|
def info_model(model: str, *, silent: bool = True) -> Dict[str, Any]:
|
|
|
|
"""Generate info about a specific model.
|
|
|
|
|
|
|
|
model (str): Model name of path.
|
|
|
|
silent (bool): Don't print anything, just return.
|
|
|
|
RETURNS (dict): The model meta.
|
|
|
|
"""
|
|
|
|
msg = Printer(no_print=silent, pretty=not silent)
|
|
|
|
if util.is_package(model):
|
|
|
|
model_path = util.get_package_path(model)
|
|
|
|
else:
|
2020-12-30 14:05:58 +03:00
|
|
|
model_path = Path(model)
|
2020-06-21 22:35:01 +03:00
|
|
|
meta_path = model_path / "meta.json"
|
|
|
|
if not meta_path.is_file():
|
2020-09-03 14:13:03 +03:00
|
|
|
msg.fail("Can't find pipeline meta.json", meta_path, exits=1)
|
2020-06-21 22:35:01 +03:00
|
|
|
meta = srsly.read_json(meta_path)
|
|
|
|
if model_path.resolve() != model_path:
|
|
|
|
meta["source"] = str(model_path.resolve())
|
|
|
|
else:
|
|
|
|
meta["source"] = str(model_path)
|
2020-09-24 15:32:35 +03:00
|
|
|
return {
|
|
|
|
k: v for k, v in meta.items() if k not in ("accuracy", "performance", "speed")
|
|
|
|
}
|
2020-06-21 22:35:01 +03:00
|
|
|
|
|
|
|
|
2020-12-30 14:05:58 +03:00
|
|
|
def get_markdown(data: Dict[str, Any], title: Optional[str] = None, exclude: List[str] = None) -> str:
|
2020-06-21 22:35:01 +03:00
|
|
|
"""Get data in GitHub-flavoured Markdown format for issues etc.
|
2018-12-01 06:55:48 +03:00
|
|
|
|
|
|
|
data (dict or list of tuples): Label/value pairs.
|
2020-05-24 19:51:10 +03:00
|
|
|
title (str / None): Title, will be rendered as headline 2.
|
2020-06-21 22:35:01 +03:00
|
|
|
RETURNS (str): The Markdown string.
|
2018-12-01 06:55:48 +03:00
|
|
|
"""
|
2020-08-26 16:33:11 +03:00
|
|
|
md = MarkdownRenderer()
|
|
|
|
if title:
|
|
|
|
md.add(md.title(2, title))
|
|
|
|
items = []
|
2018-12-01 06:59:12 +03:00
|
|
|
for key, value in data.items():
|
2020-12-30 14:05:58 +03:00
|
|
|
if exclude and key in exclude:
|
2018-12-01 06:59:12 +03:00
|
|
|
continue
|
2020-12-30 14:05:58 +03:00
|
|
|
if isinstance(value, str):
|
|
|
|
try:
|
|
|
|
existing_path = Path(value).exists()
|
|
|
|
except:
|
|
|
|
# invalid Path, like a URL string
|
|
|
|
existing_path = False
|
|
|
|
if existing_path:
|
|
|
|
continue
|
2020-08-26 16:33:11 +03:00
|
|
|
items.append(f"{md.bold(f'{key}:')} {value}")
|
|
|
|
md.add(md.list(items))
|
|
|
|
return f"\n{md.text}\n"
|