mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Merge pull request #8465 from explosion/feature/spacy-package-readme
This commit is contained in:
		
						commit
						a8e8d02ba7
					
				|  | @ -1,7 +1,7 @@ | ||||||
| from typing import Optional, Union, Any, Dict, List, Tuple | from typing import Optional, Union, Any, Dict, List, Tuple | ||||||
| import shutil | import shutil | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
| from wasabi import Printer, get_raw_input | from wasabi import Printer, MarkdownRenderer, get_raw_input | ||||||
| import srsly | import srsly | ||||||
| import sys | import sys | ||||||
| 
 | 
 | ||||||
|  | @ -134,6 +134,11 @@ def package( | ||||||
|         file_path = package_path / model_name_v / file_name |         file_path = package_path / model_name_v / file_name | ||||||
|         if file_path.exists(): |         if file_path.exists(): | ||||||
|             shutil.copy(str(file_path), str(main_path)) |             shutil.copy(str(file_path), str(main_path)) | ||||||
|  |     readme_path = main_path / "README.md" | ||||||
|  |     if not readme_path.exists(): | ||||||
|  |         readme = generate_readme(meta) | ||||||
|  |         create_file(readme_path, readme) | ||||||
|  |         create_file(package_path / model_name_v / "README.md", readme) | ||||||
|     imports = [] |     imports = [] | ||||||
|     for code_path in code_paths: |     for code_path in code_paths: | ||||||
|         imports.append(code_path.stem) |         imports.append(code_path.stem) | ||||||
|  | @ -234,6 +239,113 @@ def generate_meta(existing_meta: Dict[str, Any], msg: Printer) -> Dict[str, Any] | ||||||
|     return meta |     return meta | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def generate_readme(meta: Dict[str, Any]) -> str: | ||||||
|  |     """ | ||||||
|  |     Generate a Markdown-formatted README text from a model meta.json. Used | ||||||
|  |     within the GitHub release notes and as content for README.md file added | ||||||
|  |     to model packages. | ||||||
|  |     """ | ||||||
|  |     md = MarkdownRenderer() | ||||||
|  |     lang = meta["lang"] | ||||||
|  |     name = f"{lang}_{meta['name']}" | ||||||
|  |     version = meta["version"] | ||||||
|  |     pipeline = ", ".join([md.code(p) for p in meta.get("pipeline", [])]) | ||||||
|  |     components = ", ".join([md.code(p) for p in meta.get("components", [])]) | ||||||
|  |     vecs = meta.get("vectors", {}) | ||||||
|  |     vectors = f"{vecs.get('keys', 0)} keys, {vecs.get('vectors', 0)} unique vectors ({ vecs.get('width', 0)} dimensions)" | ||||||
|  |     author = meta.get("author") or "n/a" | ||||||
|  |     notes = meta.get("notes", "") | ||||||
|  |     license_name = meta.get("license") | ||||||
|  |     sources = _format_sources(meta.get("sources")) | ||||||
|  |     description = meta.get("description") | ||||||
|  |     label_scheme = _format_label_scheme(meta.get("labels")) | ||||||
|  |     accuracy = _format_accuracy(meta.get("performance")) | ||||||
|  |     table_data = [ | ||||||
|  |         (md.bold("Name"), md.code(name)), | ||||||
|  |         (md.bold("Version"), md.code(version)), | ||||||
|  |         (md.bold("spaCy"), md.code(meta["spacy_version"])), | ||||||
|  |         (md.bold("Default Pipeline"), pipeline), | ||||||
|  |         (md.bold("Components"), components), | ||||||
|  |         (md.bold("Vectors"), vectors), | ||||||
|  |         (md.bold("Sources"), sources or "n/a"), | ||||||
|  |         (md.bold("License"), md.code(license_name) if license_name else "n/a"), | ||||||
|  |         (md.bold("Author"), md.link(author, meta["url"]) if "url" in meta else author), | ||||||
|  |     ] | ||||||
|  |     # Put together Markdown body | ||||||
|  |     if description: | ||||||
|  |         md.add(description) | ||||||
|  |     md.add(md.table(table_data, ["Feature", "Description"])) | ||||||
|  |     if label_scheme: | ||||||
|  |         md.add(md.title(3, "Label Scheme")) | ||||||
|  |         md.add(label_scheme) | ||||||
|  |     if accuracy: | ||||||
|  |         md.add(md.title(3, "Accuracy")) | ||||||
|  |         md.add(accuracy) | ||||||
|  |     if notes: | ||||||
|  |         md.add(notes) | ||||||
|  |     return md.text | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _format_sources(data: Any) -> str: | ||||||
|  |     if not data or not isinstance(data, list): | ||||||
|  |         return "n/a" | ||||||
|  |     sources = [] | ||||||
|  |     for source in data: | ||||||
|  |         if not isinstance(source, dict): | ||||||
|  |             source = {"name": source} | ||||||
|  |         name = source.get("name") | ||||||
|  |         if not name: | ||||||
|  |             continue | ||||||
|  |         url = source.get("url") | ||||||
|  |         author = source.get("author") | ||||||
|  |         result = name if not url else "[{}]({})".format(name, url) | ||||||
|  |         if author: | ||||||
|  |             result += " ({})".format(author) | ||||||
|  |         sources.append(result) | ||||||
|  |     return "<br />".join(sources) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _format_accuracy(data: Dict[str, Any], exclude: List[str] = ["speed"]) -> str: | ||||||
|  |     if not data: | ||||||
|  |         return "" | ||||||
|  |     md = MarkdownRenderer() | ||||||
|  |     scalars = [(k, v) for k, v in data.items() if isinstance(v, (int, float))] | ||||||
|  |     scores = [ | ||||||
|  |         (md.code(acc.upper()), f"{score*100:.2f}") | ||||||
|  |         for acc, score in scalars | ||||||
|  |         if acc not in exclude | ||||||
|  |     ] | ||||||
|  |     md.add(md.table(scores, ["Type", "Score"])) | ||||||
|  |     return md.text | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _format_label_scheme(data: Dict[str, Any]) -> str: | ||||||
|  |     if not data: | ||||||
|  |         return "" | ||||||
|  |     md = MarkdownRenderer() | ||||||
|  |     n_labels = 0 | ||||||
|  |     n_pipes = 0 | ||||||
|  |     label_data = [] | ||||||
|  |     for pipe, labels in data.items(): | ||||||
|  |         if not labels: | ||||||
|  |             continue | ||||||
|  |         col1 = md.bold(md.code(pipe)) | ||||||
|  |         col2 = ", ".join( | ||||||
|  |             [md.code(label.replace("|", "\|")) for label in labels] | ||||||
|  |         )  # noqa: W605 | ||||||
|  |         label_data.append((col1, col2)) | ||||||
|  |         n_labels += len(labels) | ||||||
|  |         n_pipes += 1 | ||||||
|  |     if not label_data: | ||||||
|  |         return "" | ||||||
|  |     label_info = f"View label scheme ({n_labels} labels for {n_pipes} components)" | ||||||
|  |     md.add("<details>") | ||||||
|  |     md.add(f"<summary>{label_info}</summary>") | ||||||
|  |     md.add(md.table(label_data, ["Component", "Labels"])) | ||||||
|  |     md.add("</details>") | ||||||
|  |     return md.text | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| TEMPLATE_SETUP = """ | TEMPLATE_SETUP = """ | ||||||
| #!/usr/bin/env python | #!/usr/bin/env python | ||||||
| import io | import io | ||||||
|  |  | ||||||
|  | @ -933,7 +933,10 @@ copied into the package and imported in the `__init__.py`. If the path to a | ||||||
| [`meta.json`](/api/data-formats#meta) is supplied, or a `meta.json` is found in | [`meta.json`](/api/data-formats#meta) is supplied, or a `meta.json` is found in | ||||||
| the input directory, this file is used. Otherwise, the data can be entered | the input directory, this file is used. Otherwise, the data can be entered | ||||||
| directly from the command line. spaCy will then create a build artifact that you | directly from the command line. spaCy will then create a build artifact that you | ||||||
| can distribute and install with `pip install`. | can distribute and install with `pip install`. As of v3.1, the `package` command | ||||||
|  | will also create a formatted `README.md` based on the pipeline information | ||||||
|  | defined in the `meta.json`. If a `README.md` is already present in the source | ||||||
|  | directory, it will be used instead. | ||||||
| 
 | 
 | ||||||
| <Infobox title="New in v3.0" variant="warning"> | <Infobox title="New in v3.0" variant="warning"> | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user