Allow overriding meta from spacy.blank

This commit is contained in:
Ines Montani 2020-09-15 11:12:12 +02:00
parent 0f943157af
commit 7dfc4bc062
3 changed files with 20 additions and 3 deletions

View File

@ -47,13 +47,17 @@ def load(
def blank(
name: str, *, config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict()
name: str,
*,
config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
meta: Dict[str, Any] = util.SimpleFrozenDict()
) -> Language:
"""Create a blank nlp object for a given language code.
name (str): The language code, e.g. "en".
config (Dict[str, Any] / Config): Optional config overrides.
meta (Dict[str, Any]): Overrides for nlp.meta.
RETURNS (Language): The nlp object.
"""
LangClass = util.get_lang_class(name)
return LangClass.from_config(config)
return LangClass.from_config(config, meta=meta)

View File

@ -1458,6 +1458,7 @@ class Language:
vocab: Union[Vocab, bool] = True,
disable: Iterable[str] = SimpleFrozenList(),
exclude: Iterable[str] = SimpleFrozenList(),
meta: Dict[str, Any] = SimpleFrozenDict(),
auto_fill: bool = True,
validate: bool = True,
) -> "Language":
@ -1472,6 +1473,7 @@ class Language:
explicitly enable them by calling nlp.enable_pipe.
exclude (Iterable[str]): Names of pipeline components to exclude.
Excluded components won't be loaded.
meta (Dict[str, Any]): Meta overrides for nlp.meta.
auto_fill (bool): Automatically fill in missing values in config based
on defaults and function argument annotations.
validate (bool): Validate the component config and arguments against
@ -1525,7 +1527,7 @@ class Language:
# inside stuff like the spacy train function. If we loaded them here,
# then we would load them twice at runtime: once when we make from config,
# and then again when we load from disk.
nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer)
nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer, meta=meta)
if after_creation is not None:
nlp = after_creation(nlp)
if not isinstance(nlp, cls):

View File

@ -6,6 +6,7 @@ from spacy.vocab import Vocab
from spacy.training import Example
from spacy.lang.en import English
from spacy.util import registry
import spacy
from .util import add_vecs_to_vocab, assert_docs_equal
@ -266,3 +267,13 @@ def test_language_custom_tokenizer():
assert [t.text for t in doc] == ["_hello", "_world"]
doc = list(nlp.pipe(["hello world"]))[0]
assert [t.text for t in doc] == ["_hello", "_world"]
def test_spacy_blank():
nlp = spacy.blank("en")
assert nlp.config["training"]["dropout"] == 0.1
config = {"training": {"dropout": 0.2}}
meta = {"name": "my_custom_model"}
nlp = spacy.blank("en", config=config, meta=meta)
assert nlp.config["training"]["dropout"] == 0.2
assert nlp.meta["name"] == "my_custom_model"