mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 07:27:28 +03:00 
			
		
		
		
	Merge pull request #8465 from explosion/feature/spacy-package-readme
This commit is contained in:
		
						commit
						a8e8d02ba7
					
				|  | @ -1,7 +1,7 @@ | |||
| from typing import Optional, Union, Any, Dict, List, Tuple | ||||
| import shutil | ||||
| from pathlib import Path | ||||
| from wasabi import Printer, get_raw_input | ||||
| from wasabi import Printer, MarkdownRenderer, get_raw_input | ||||
| import srsly | ||||
| import sys | ||||
| 
 | ||||
|  | @ -134,6 +134,11 @@ def package( | |||
|         file_path = package_path / model_name_v / file_name | ||||
|         if file_path.exists(): | ||||
|             shutil.copy(str(file_path), str(main_path)) | ||||
|     readme_path = main_path / "README.md" | ||||
|     if not readme_path.exists(): | ||||
|         readme = generate_readme(meta) | ||||
|         create_file(readme_path, readme) | ||||
|         create_file(package_path / model_name_v / "README.md", readme) | ||||
|     imports = [] | ||||
|     for code_path in code_paths: | ||||
|         imports.append(code_path.stem) | ||||
|  | @ -234,6 +239,113 @@ def generate_meta(existing_meta: Dict[str, Any], msg: Printer) -> Dict[str, Any] | |||
|     return meta | ||||
| 
 | ||||
| 
 | ||||
| def generate_readme(meta: Dict[str, Any]) -> str: | ||||
|     """ | ||||
|     Generate a Markdown-formatted README text from a model meta.json. Used | ||||
|     within the GitHub release notes and as content for README.md file added | ||||
|     to model packages. | ||||
|     """ | ||||
|     md = MarkdownRenderer() | ||||
|     lang = meta["lang"] | ||||
|     name = f"{lang}_{meta['name']}" | ||||
|     version = meta["version"] | ||||
|     pipeline = ", ".join([md.code(p) for p in meta.get("pipeline", [])]) | ||||
|     components = ", ".join([md.code(p) for p in meta.get("components", [])]) | ||||
|     vecs = meta.get("vectors", {}) | ||||
|     vectors = f"{vecs.get('keys', 0)} keys, {vecs.get('vectors', 0)} unique vectors ({ vecs.get('width', 0)} dimensions)" | ||||
|     author = meta.get("author") or "n/a" | ||||
|     notes = meta.get("notes", "") | ||||
|     license_name = meta.get("license") | ||||
|     sources = _format_sources(meta.get("sources")) | ||||
|     description = meta.get("description") | ||||
|     label_scheme = _format_label_scheme(meta.get("labels")) | ||||
|     accuracy = _format_accuracy(meta.get("performance")) | ||||
|     table_data = [ | ||||
|         (md.bold("Name"), md.code(name)), | ||||
|         (md.bold("Version"), md.code(version)), | ||||
|         (md.bold("spaCy"), md.code(meta["spacy_version"])), | ||||
|         (md.bold("Default Pipeline"), pipeline), | ||||
|         (md.bold("Components"), components), | ||||
|         (md.bold("Vectors"), vectors), | ||||
|         (md.bold("Sources"), sources or "n/a"), | ||||
|         (md.bold("License"), md.code(license_name) if license_name else "n/a"), | ||||
|         (md.bold("Author"), md.link(author, meta["url"]) if "url" in meta else author), | ||||
|     ] | ||||
|     # Put together Markdown body | ||||
|     if description: | ||||
|         md.add(description) | ||||
|     md.add(md.table(table_data, ["Feature", "Description"])) | ||||
|     if label_scheme: | ||||
|         md.add(md.title(3, "Label Scheme")) | ||||
|         md.add(label_scheme) | ||||
|     if accuracy: | ||||
|         md.add(md.title(3, "Accuracy")) | ||||
|         md.add(accuracy) | ||||
|     if notes: | ||||
|         md.add(notes) | ||||
|     return md.text | ||||
| 
 | ||||
| 
 | ||||
| def _format_sources(data: Any) -> str: | ||||
|     if not data or not isinstance(data, list): | ||||
|         return "n/a" | ||||
|     sources = [] | ||||
|     for source in data: | ||||
|         if not isinstance(source, dict): | ||||
|             source = {"name": source} | ||||
|         name = source.get("name") | ||||
|         if not name: | ||||
|             continue | ||||
|         url = source.get("url") | ||||
|         author = source.get("author") | ||||
|         result = name if not url else "[{}]({})".format(name, url) | ||||
|         if author: | ||||
|             result += " ({})".format(author) | ||||
|         sources.append(result) | ||||
|     return "<br />".join(sources) | ||||
| 
 | ||||
| 
 | ||||
| def _format_accuracy(data: Dict[str, Any], exclude: List[str] = ["speed"]) -> str: | ||||
|     if not data: | ||||
|         return "" | ||||
|     md = MarkdownRenderer() | ||||
|     scalars = [(k, v) for k, v in data.items() if isinstance(v, (int, float))] | ||||
|     scores = [ | ||||
|         (md.code(acc.upper()), f"{score*100:.2f}") | ||||
|         for acc, score in scalars | ||||
|         if acc not in exclude | ||||
|     ] | ||||
|     md.add(md.table(scores, ["Type", "Score"])) | ||||
|     return md.text | ||||
| 
 | ||||
| 
 | ||||
| def _format_label_scheme(data: Dict[str, Any]) -> str: | ||||
|     if not data: | ||||
|         return "" | ||||
|     md = MarkdownRenderer() | ||||
|     n_labels = 0 | ||||
|     n_pipes = 0 | ||||
|     label_data = [] | ||||
|     for pipe, labels in data.items(): | ||||
|         if not labels: | ||||
|             continue | ||||
|         col1 = md.bold(md.code(pipe)) | ||||
|         col2 = ", ".join( | ||||
|             [md.code(label.replace("|", "\|")) for label in labels] | ||||
|         )  # noqa: W605 | ||||
|         label_data.append((col1, col2)) | ||||
|         n_labels += len(labels) | ||||
|         n_pipes += 1 | ||||
|     if not label_data: | ||||
|         return "" | ||||
|     label_info = f"View label scheme ({n_labels} labels for {n_pipes} components)" | ||||
|     md.add("<details>") | ||||
|     md.add(f"<summary>{label_info}</summary>") | ||||
|     md.add(md.table(label_data, ["Component", "Labels"])) | ||||
|     md.add("</details>") | ||||
|     return md.text | ||||
| 
 | ||||
| 
 | ||||
| TEMPLATE_SETUP = """ | ||||
| #!/usr/bin/env python | ||||
| import io | ||||
|  |  | |||
|  | @ -933,7 +933,10 @@ copied into the package and imported in the `__init__.py`. If the path to a | |||
| [`meta.json`](/api/data-formats#meta) is supplied, or a `meta.json` is found in | ||||
| the input directory, this file is used. Otherwise, the data can be entered | ||||
| directly from the command line. spaCy will then create a build artifact that you | ||||
| can distribute and install with `pip install`. | ||||
| can distribute and install with `pip install`. As of v3.1, the `package` command | ||||
| will also create a formatted `README.md` based on the pipeline information | ||||
| defined in the `meta.json`. If a `README.md` is already present in the source | ||||
| directory, it will be used instead. | ||||
| 
 | ||||
| <Infobox title="New in v3.0" variant="warning"> | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user