mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 01:34:30 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
e1e1760fd6
|
@ -5,7 +5,7 @@ thinc>=8.0.0a30,<8.0.0a40
|
||||||
blis>=0.4.0,<0.5.0
|
blis>=0.4.0,<0.5.0
|
||||||
ml_datasets>=0.1.1
|
ml_datasets>=0.1.1
|
||||||
murmurhash>=0.28.0,<1.1.0
|
murmurhash>=0.28.0,<1.1.0
|
||||||
wasabi>=0.7.1,<1.1.0
|
wasabi>=0.8.0,<1.1.0
|
||||||
srsly>=2.1.0,<3.0.0
|
srsly>=2.1.0,<3.0.0
|
||||||
catalogue>=0.0.7,<1.1.0
|
catalogue>=0.0.7,<1.1.0
|
||||||
typer>=0.3.0,<0.4.0
|
typer>=0.3.0,<0.4.0
|
||||||
|
|
|
@ -42,7 +42,7 @@ install_requires =
|
||||||
preshed>=3.0.2,<3.1.0
|
preshed>=3.0.2,<3.1.0
|
||||||
thinc>=8.0.0a30,<8.0.0a40
|
thinc>=8.0.0a30,<8.0.0a40
|
||||||
blis>=0.4.0,<0.5.0
|
blis>=0.4.0,<0.5.0
|
||||||
wasabi>=0.7.1,<1.1.0
|
wasabi>=0.8.0,<1.1.0
|
||||||
srsly>=2.1.0,<3.0.0
|
srsly>=2.1.0,<3.0.0
|
||||||
catalogue>=0.0.7,<1.1.0
|
catalogue>=0.0.7,<1.1.0
|
||||||
typer>=0.3.0,<0.4.0
|
typer>=0.3.0,<0.4.0
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from typing import Optional, Dict, Any, Union
|
from typing import Optional, Dict, Any, Union
|
||||||
import platform
|
import platform
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from wasabi import Printer
|
from wasabi import Printer, MarkdownRenderer
|
||||||
import srsly
|
import srsly
|
||||||
|
|
||||||
from ._util import app, Arg, Opt
|
from ._util import app, Arg, Opt
|
||||||
|
@ -97,12 +97,13 @@ def get_markdown(data: Dict[str, Any], title: Optional[str] = None) -> str:
|
||||||
title (str / None): Title, will be rendered as headline 2.
|
title (str / None): Title, will be rendered as headline 2.
|
||||||
RETURNS (str): The Markdown string.
|
RETURNS (str): The Markdown string.
|
||||||
"""
|
"""
|
||||||
markdown = []
|
md = MarkdownRenderer()
|
||||||
|
if title:
|
||||||
|
md.add(md.title(2, title))
|
||||||
|
items = []
|
||||||
for key, value in data.items():
|
for key, value in data.items():
|
||||||
if isinstance(value, str) and Path(value).exists():
|
if isinstance(value, str) and Path(value).exists():
|
||||||
continue
|
continue
|
||||||
markdown.append(f"* **{key}:** {value}")
|
items.append(f"{md.bold(f'{key}:')} {value}")
|
||||||
result = "\n{}\n".format("\n".join(markdown))
|
md.add(md.list(items))
|
||||||
if title:
|
return f"\n{md.text}\n"
|
||||||
result = f"\n## {title}\n{result}"
|
|
||||||
return result
|
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
from typing import Iterable, Optional
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from wasabi import msg
|
from wasabi import msg, MarkdownRenderer
|
||||||
|
|
||||||
from ...util import working_dir
|
from ...util import working_dir
|
||||||
from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
|
from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
|
||||||
|
@ -107,34 +106,3 @@ def project_document(
|
||||||
with output_file.open("w") as f:
|
with output_file.open("w") as f:
|
||||||
f.write(content)
|
f.write(content)
|
||||||
msg.good("Saved project documentation", output_file)
|
msg.good("Saved project documentation", output_file)
|
||||||
|
|
||||||
|
|
||||||
class MarkdownRenderer:
|
|
||||||
"""Simple helper for generating raw Markdown."""
|
|
||||||
|
|
||||||
def __init__(self, no_emoji: bool = False):
|
|
||||||
self.data = []
|
|
||||||
self.no_emoji = no_emoji
|
|
||||||
|
|
||||||
@property
|
|
||||||
def text(self):
|
|
||||||
return "\n\n".join(self.data)
|
|
||||||
|
|
||||||
def add(self, content: str) -> None:
|
|
||||||
self.data.append(content)
|
|
||||||
|
|
||||||
def table(self, data: Iterable[Iterable[str]], header: Iterable[str]) -> str:
|
|
||||||
head = f"| {' | '.join(header)} |"
|
|
||||||
divider = f"| {' | '.join('---' for _ in header)} |"
|
|
||||||
body = "\n".join(f"| {' | '.join(row)} |" for row in data)
|
|
||||||
return f"{head}\n{divider}\n{body}"
|
|
||||||
|
|
||||||
def title(self, level: int, text: str, emoji: Optional[str] = None) -> str:
|
|
||||||
prefix = f"{emoji} " if emoji and not self.no_emoji else ""
|
|
||||||
return f"{'#' * level} {prefix}{text}"
|
|
||||||
|
|
||||||
def code(self, text: str) -> str:
|
|
||||||
return f"`{text}`"
|
|
||||||
|
|
||||||
def link(self, text: str, url: str) -> str:
|
|
||||||
return f"[{text}]({url})"
|
|
||||||
|
|
|
@ -273,10 +273,6 @@ class Errors:
|
||||||
"existing extension, set `force=True` on `{obj}.set_extension`.")
|
"existing extension, set `force=True` on `{obj}.set_extension`.")
|
||||||
E091 = ("Invalid extension attribute {name}: expected callable or None, "
|
E091 = ("Invalid extension attribute {name}: expected callable or None, "
|
||||||
"but got: {value}")
|
"but got: {value}")
|
||||||
E092 = ("Could not find or assign name for word vectors. Ususally, the "
|
|
||||||
"name is read from the model's meta.json in vector.name. "
|
|
||||||
"Alternatively, it is built from the 'lang' and 'name' keys in "
|
|
||||||
"the meta.json. Vector names are required to avoid issue #1660.")
|
|
||||||
E093 = ("token.ent_iob values make invalid sequence: I without B\n{seq}")
|
E093 = ("token.ent_iob values make invalid sequence: I without B\n{seq}")
|
||||||
E094 = ("Error reading line {line_num} in vectors file {loc}.")
|
E094 = ("Error reading line {line_num} in vectors file {loc}.")
|
||||||
E095 = ("Can't write to frozen dictionary. This is likely an internal "
|
E095 = ("Can't write to frozen dictionary. This is likely an internal "
|
||||||
|
|
|
@ -1538,7 +1538,6 @@ class Language:
|
||||||
def deserialize_vocab(path: Path) -> None:
|
def deserialize_vocab(path: Path) -> None:
|
||||||
if path.exists():
|
if path.exists():
|
||||||
self.vocab.from_disk(path)
|
self.vocab.from_disk(path)
|
||||||
_fix_pretrained_vectors_name(self)
|
|
||||||
|
|
||||||
path = util.ensure_path(path)
|
path = util.ensure_path(path)
|
||||||
deserializers = {}
|
deserializers = {}
|
||||||
|
@ -1605,14 +1604,10 @@ class Language:
|
||||||
# from self.vocab.vectors, so set the name directly
|
# from self.vocab.vectors, so set the name directly
|
||||||
self.vocab.vectors.name = data.get("vectors", {}).get("name")
|
self.vocab.vectors.name = data.get("vectors", {}).get("name")
|
||||||
|
|
||||||
def deserialize_vocab(b):
|
|
||||||
self.vocab.from_bytes(b)
|
|
||||||
_fix_pretrained_vectors_name(self)
|
|
||||||
|
|
||||||
deserializers = {}
|
deserializers = {}
|
||||||
deserializers["config.cfg"] = lambda b: self.config.from_bytes(b)
|
deserializers["config.cfg"] = lambda b: self.config.from_bytes(b)
|
||||||
deserializers["meta.json"] = deserialize_meta
|
deserializers["meta.json"] = deserialize_meta
|
||||||
deserializers["vocab"] = deserialize_vocab
|
deserializers["vocab"] = self.vocab.from_bytes
|
||||||
deserializers["tokenizer"] = lambda b: self.tokenizer.from_bytes(
|
deserializers["tokenizer"] = lambda b: self.tokenizer.from_bytes(
|
||||||
b, exclude=["vocab"]
|
b, exclude=["vocab"]
|
||||||
)
|
)
|
||||||
|
@ -1646,25 +1641,6 @@ class FactoryMeta:
|
||||||
default_score_weights: Optional[Dict[str, float]] = None # noqa: E704
|
default_score_weights: Optional[Dict[str, float]] = None # noqa: E704
|
||||||
|
|
||||||
|
|
||||||
def _fix_pretrained_vectors_name(nlp: Language) -> None:
|
|
||||||
# TODO: Replace this once we handle vectors consistently as static
|
|
||||||
# data
|
|
||||||
if "vectors" in nlp.meta and "name" in nlp.meta["vectors"]:
|
|
||||||
nlp.vocab.vectors.name = nlp.meta["vectors"]["name"]
|
|
||||||
elif not nlp.vocab.vectors.size:
|
|
||||||
nlp.vocab.vectors.name = None
|
|
||||||
elif "name" in nlp.meta and "lang" in nlp.meta:
|
|
||||||
vectors_name = f"{nlp.meta['lang']}_{nlp.meta['name']}.vectors"
|
|
||||||
nlp.vocab.vectors.name = vectors_name
|
|
||||||
else:
|
|
||||||
raise ValueError(Errors.E092)
|
|
||||||
for name, proc in nlp.pipeline:
|
|
||||||
if not hasattr(proc, "cfg") or not isinstance(proc.cfg, dict):
|
|
||||||
continue
|
|
||||||
proc.cfg.setdefault("deprecation_fixes", {})
|
|
||||||
proc.cfg["deprecation_fixes"]["vectors_name"] = nlp.vocab.vectors.name
|
|
||||||
|
|
||||||
|
|
||||||
class DisabledPipes(list):
|
class DisabledPipes(list):
|
||||||
"""Manager for temporary pipeline disabling."""
|
"""Manager for temporary pipeline disabling."""
|
||||||
|
|
||||||
|
|
|
@ -146,8 +146,12 @@ validation error with more details.
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
> ```cli
|
> ```cli
|
||||||
> $ python -m spacy init fill-config base.cfg config.cfg
|
> $ python -m spacy init fill-config base.cfg config.cfg --diff
|
||||||
> ```
|
> ```
|
||||||
|
>
|
||||||
|
> #### Example diff
|
||||||
|
>
|
||||||
|
> ![Screenshot of visual diff in terminal](../images/cli_init_fill-config_diff.jpg)
|
||||||
|
|
||||||
```cli
|
```cli
|
||||||
$ python -m spacy init fill-config [base_path] [output_file] [--diff]
|
$ python -m spacy init fill-config [base_path] [output_file] [--diff]
|
||||||
|
|
BIN
website/docs/images/cli_init_fill-config_diff.jpg
Normal file
BIN
website/docs/images/cli_init_fill-config_diff.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 202 KiB |
Loading…
Reference in New Issue
Block a user