Merge pull request #8465 from explosion/feature/spacy-package-readme

This commit is contained in:
Ines Montani 2021-06-24 13:11:08 +10:00 committed by GitHub
commit a8e8d02ba7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 117 additions and 2 deletions

View File

@ -1,7 +1,7 @@
from typing import Optional, Union, Any, Dict, List, Tuple from typing import Optional, Union, Any, Dict, List, Tuple
import shutil import shutil
from pathlib import Path from pathlib import Path
from wasabi import Printer, get_raw_input from wasabi import Printer, MarkdownRenderer, get_raw_input
import srsly import srsly
import sys import sys
@ -134,6 +134,11 @@ def package(
file_path = package_path / model_name_v / file_name file_path = package_path / model_name_v / file_name
if file_path.exists(): if file_path.exists():
shutil.copy(str(file_path), str(main_path)) shutil.copy(str(file_path), str(main_path))
readme_path = main_path / "README.md"
if not readme_path.exists():
readme = generate_readme(meta)
create_file(readme_path, readme)
create_file(package_path / model_name_v / "README.md", readme)
imports = [] imports = []
for code_path in code_paths: for code_path in code_paths:
imports.append(code_path.stem) imports.append(code_path.stem)
@ -234,6 +239,113 @@ def generate_meta(existing_meta: Dict[str, Any], msg: Printer) -> Dict[str, Any]
return meta return meta
def generate_readme(meta: Dict[str, Any]) -> str:
"""
Generate a Markdown-formatted README text from a model meta.json. Used
within the GitHub release notes and as content for README.md file added
to model packages.
"""
md = MarkdownRenderer()
lang = meta["lang"]
name = f"{lang}_{meta['name']}"
version = meta["version"]
pipeline = ", ".join([md.code(p) for p in meta.get("pipeline", [])])
components = ", ".join([md.code(p) for p in meta.get("components", [])])
vecs = meta.get("vectors", {})
vectors = f"{vecs.get('keys', 0)} keys, {vecs.get('vectors', 0)} unique vectors ({ vecs.get('width', 0)} dimensions)"
author = meta.get("author") or "n/a"
notes = meta.get("notes", "")
license_name = meta.get("license")
sources = _format_sources(meta.get("sources"))
description = meta.get("description")
label_scheme = _format_label_scheme(meta.get("labels"))
accuracy = _format_accuracy(meta.get("performance"))
table_data = [
(md.bold("Name"), md.code(name)),
(md.bold("Version"), md.code(version)),
(md.bold("spaCy"), md.code(meta["spacy_version"])),
(md.bold("Default Pipeline"), pipeline),
(md.bold("Components"), components),
(md.bold("Vectors"), vectors),
(md.bold("Sources"), sources or "n/a"),
(md.bold("License"), md.code(license_name) if license_name else "n/a"),
(md.bold("Author"), md.link(author, meta["url"]) if "url" in meta else author),
]
# Put together Markdown body
if description:
md.add(description)
md.add(md.table(table_data, ["Feature", "Description"]))
if label_scheme:
md.add(md.title(3, "Label Scheme"))
md.add(label_scheme)
if accuracy:
md.add(md.title(3, "Accuracy"))
md.add(accuracy)
if notes:
md.add(notes)
return md.text
def _format_sources(data: Any) -> str:
if not data or not isinstance(data, list):
return "n/a"
sources = []
for source in data:
if not isinstance(source, dict):
source = {"name": source}
name = source.get("name")
if not name:
continue
url = source.get("url")
author = source.get("author")
result = name if not url else "[{}]({})".format(name, url)
if author:
result += " ({})".format(author)
sources.append(result)
return "<br />".join(sources)
def _format_accuracy(data: Dict[str, Any], exclude: List[str] = ["speed"]) -> str:
if not data:
return ""
md = MarkdownRenderer()
scalars = [(k, v) for k, v in data.items() if isinstance(v, (int, float))]
scores = [
(md.code(acc.upper()), f"{score*100:.2f}")
for acc, score in scalars
if acc not in exclude
]
md.add(md.table(scores, ["Type", "Score"]))
return md.text
def _format_label_scheme(data: Dict[str, Any]) -> str:
if not data:
return ""
md = MarkdownRenderer()
n_labels = 0
n_pipes = 0
label_data = []
for pipe, labels in data.items():
if not labels:
continue
col1 = md.bold(md.code(pipe))
col2 = ", ".join(
[md.code(label.replace("|", "\|")) for label in labels]
) # noqa: W605
label_data.append((col1, col2))
n_labels += len(labels)
n_pipes += 1
if not label_data:
return ""
label_info = f"View label scheme ({n_labels} labels for {n_pipes} components)"
md.add("<details>")
md.add(f"<summary>{label_info}</summary>")
md.add(md.table(label_data, ["Component", "Labels"]))
md.add("</details>")
return md.text
TEMPLATE_SETUP = """ TEMPLATE_SETUP = """
#!/usr/bin/env python #!/usr/bin/env python
import io import io

View File

@ -933,7 +933,10 @@ copied into the package and imported in the `__init__.py`. If the path to a
[`meta.json`](/api/data-formats#meta) is supplied, or a `meta.json` is found in [`meta.json`](/api/data-formats#meta) is supplied, or a `meta.json` is found in
the input directory, this file is used. Otherwise, the data can be entered the input directory, this file is used. Otherwise, the data can be entered
directly from the command line. spaCy will then create a build artifact that you directly from the command line. spaCy will then create a build artifact that you
can distribute and install with `pip install`. can distribute and install with `pip install`. As of v3.1, the `package` command
will also create a formatted `README.md` based on the pipeline information
defined in the `meta.json`. If a `README.md` is already present in the source
directory, it will be used instead.
<Infobox title="New in v3.0" variant="warning"> <Infobox title="New in v3.0" variant="warning">