Merge branch 'develop' of https://github.com/explosion/spaCy into develop

This commit is contained in:
Matthew Honnibal 2020-08-27 03:22:11 +02:00
commit e1e1760fd6
8 changed files with 17 additions and 72 deletions

View File

@ -5,7 +5,7 @@ thinc>=8.0.0a30,<8.0.0a40
blis>=0.4.0,<0.5.0 blis>=0.4.0,<0.5.0
ml_datasets>=0.1.1 ml_datasets>=0.1.1
murmurhash>=0.28.0,<1.1.0 murmurhash>=0.28.0,<1.1.0
wasabi>=0.7.1,<1.1.0 wasabi>=0.8.0,<1.1.0
srsly>=2.1.0,<3.0.0 srsly>=2.1.0,<3.0.0
catalogue>=0.0.7,<1.1.0 catalogue>=0.0.7,<1.1.0
typer>=0.3.0,<0.4.0 typer>=0.3.0,<0.4.0

View File

@ -42,7 +42,7 @@ install_requires =
preshed>=3.0.2,<3.1.0 preshed>=3.0.2,<3.1.0
thinc>=8.0.0a30,<8.0.0a40 thinc>=8.0.0a30,<8.0.0a40
blis>=0.4.0,<0.5.0 blis>=0.4.0,<0.5.0
wasabi>=0.7.1,<1.1.0 wasabi>=0.8.0,<1.1.0
srsly>=2.1.0,<3.0.0 srsly>=2.1.0,<3.0.0
catalogue>=0.0.7,<1.1.0 catalogue>=0.0.7,<1.1.0
typer>=0.3.0,<0.4.0 typer>=0.3.0,<0.4.0

View File

@ -1,7 +1,7 @@
from typing import Optional, Dict, Any, Union from typing import Optional, Dict, Any, Union
import platform import platform
from pathlib import Path from pathlib import Path
from wasabi import Printer from wasabi import Printer, MarkdownRenderer
import srsly import srsly
from ._util import app, Arg, Opt from ._util import app, Arg, Opt
@ -97,12 +97,13 @@ def get_markdown(data: Dict[str, Any], title: Optional[str] = None) -> str:
title (str / None): Title, will be rendered as headline 2. title (str / None): Title, will be rendered as headline 2.
RETURNS (str): The Markdown string. RETURNS (str): The Markdown string.
""" """
markdown = [] md = MarkdownRenderer()
if title:
md.add(md.title(2, title))
items = []
for key, value in data.items(): for key, value in data.items():
if isinstance(value, str) and Path(value).exists(): if isinstance(value, str) and Path(value).exists():
continue continue
markdown.append(f"* **{key}:** {value}") items.append(f"{md.bold(f'{key}:')} {value}")
result = "\n{}\n".format("\n".join(markdown)) md.add(md.list(items))
if title: return f"\n{md.text}\n"
result = f"\n## {title}\n{result}"
return result

View File

@ -1,6 +1,5 @@
from typing import Iterable, Optional
from pathlib import Path from pathlib import Path
from wasabi import msg from wasabi import msg, MarkdownRenderer
from ...util import working_dir from ...util import working_dir
from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
@ -107,34 +106,3 @@ def project_document(
with output_file.open("w") as f: with output_file.open("w") as f:
f.write(content) f.write(content)
msg.good("Saved project documentation", output_file) msg.good("Saved project documentation", output_file)
class MarkdownRenderer:
"""Simple helper for generating raw Markdown."""
def __init__(self, no_emoji: bool = False):
self.data = []
self.no_emoji = no_emoji
@property
def text(self):
return "\n\n".join(self.data)
def add(self, content: str) -> None:
self.data.append(content)
def table(self, data: Iterable[Iterable[str]], header: Iterable[str]) -> str:
head = f"| {' | '.join(header)} |"
divider = f"| {' | '.join('---' for _ in header)} |"
body = "\n".join(f"| {' | '.join(row)} |" for row in data)
return f"{head}\n{divider}\n{body}"
def title(self, level: int, text: str, emoji: Optional[str] = None) -> str:
prefix = f"{emoji} " if emoji and not self.no_emoji else ""
return f"{'#' * level} {prefix}{text}"
def code(self, text: str) -> str:
return f"`{text}`"
def link(self, text: str, url: str) -> str:
return f"[{text}]({url})"

View File

@ -273,10 +273,6 @@ class Errors:
"existing extension, set `force=True` on `{obj}.set_extension`.") "existing extension, set `force=True` on `{obj}.set_extension`.")
E091 = ("Invalid extension attribute {name}: expected callable or None, " E091 = ("Invalid extension attribute {name}: expected callable or None, "
"but got: {value}") "but got: {value}")
E092 = ("Could not find or assign name for word vectors. Ususally, the "
"name is read from the model's meta.json in vector.name. "
"Alternatively, it is built from the 'lang' and 'name' keys in "
"the meta.json. Vector names are required to avoid issue #1660.")
E093 = ("token.ent_iob values make invalid sequence: I without B\n{seq}") E093 = ("token.ent_iob values make invalid sequence: I without B\n{seq}")
E094 = ("Error reading line {line_num} in vectors file {loc}.") E094 = ("Error reading line {line_num} in vectors file {loc}.")
E095 = ("Can't write to frozen dictionary. This is likely an internal " E095 = ("Can't write to frozen dictionary. This is likely an internal "

View File

@ -1538,7 +1538,6 @@ class Language:
def deserialize_vocab(path: Path) -> None: def deserialize_vocab(path: Path) -> None:
if path.exists(): if path.exists():
self.vocab.from_disk(path) self.vocab.from_disk(path)
_fix_pretrained_vectors_name(self)
path = util.ensure_path(path) path = util.ensure_path(path)
deserializers = {} deserializers = {}
@ -1605,14 +1604,10 @@ class Language:
# from self.vocab.vectors, so set the name directly # from self.vocab.vectors, so set the name directly
self.vocab.vectors.name = data.get("vectors", {}).get("name") self.vocab.vectors.name = data.get("vectors", {}).get("name")
def deserialize_vocab(b):
self.vocab.from_bytes(b)
_fix_pretrained_vectors_name(self)
deserializers = {} deserializers = {}
deserializers["config.cfg"] = lambda b: self.config.from_bytes(b) deserializers["config.cfg"] = lambda b: self.config.from_bytes(b)
deserializers["meta.json"] = deserialize_meta deserializers["meta.json"] = deserialize_meta
deserializers["vocab"] = deserialize_vocab deserializers["vocab"] = self.vocab.from_bytes
deserializers["tokenizer"] = lambda b: self.tokenizer.from_bytes( deserializers["tokenizer"] = lambda b: self.tokenizer.from_bytes(
b, exclude=["vocab"] b, exclude=["vocab"]
) )
@ -1646,25 +1641,6 @@ class FactoryMeta:
default_score_weights: Optional[Dict[str, float]] = None # noqa: E704 default_score_weights: Optional[Dict[str, float]] = None # noqa: E704
def _fix_pretrained_vectors_name(nlp: Language) -> None:
# TODO: Replace this once we handle vectors consistently as static
# data
if "vectors" in nlp.meta and "name" in nlp.meta["vectors"]:
nlp.vocab.vectors.name = nlp.meta["vectors"]["name"]
elif not nlp.vocab.vectors.size:
nlp.vocab.vectors.name = None
elif "name" in nlp.meta and "lang" in nlp.meta:
vectors_name = f"{nlp.meta['lang']}_{nlp.meta['name']}.vectors"
nlp.vocab.vectors.name = vectors_name
else:
raise ValueError(Errors.E092)
for name, proc in nlp.pipeline:
if not hasattr(proc, "cfg") or not isinstance(proc.cfg, dict):
continue
proc.cfg.setdefault("deprecation_fixes", {})
proc.cfg["deprecation_fixes"]["vectors_name"] = nlp.vocab.vectors.name
class DisabledPipes(list): class DisabledPipes(list):
"""Manager for temporary pipeline disabling.""" """Manager for temporary pipeline disabling."""

View File

@ -146,8 +146,12 @@ validation error with more details.
> #### Example > #### Example
> >
> ```cli > ```cli
> $ python -m spacy init fill-config base.cfg config.cfg > $ python -m spacy init fill-config base.cfg config.cfg --diff
> ``` > ```
>
> #### Example diff
>
> ![Screenshot of visual diff in terminal](../images/cli_init_fill-config_diff.jpg)
```cli ```cli
$ python -m spacy init fill-config [base_path] [output_file] [--diff] $ python -m spacy init fill-config [base_path] [output_file] [--diff]

Binary file not shown.

After

Width:  |  Height:  |  Size: 202 KiB