Allow overriding meta from spacy.blank

This commit is contained in:
Ines Montani 2020-09-15 11:12:12 +02:00
parent 0f943157af
commit 7dfc4bc062
3 changed files with 20 additions and 3 deletions

View File

@ -47,13 +47,17 @@ def load(
def blank( def blank(
name: str, *, config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict() name: str,
*,
config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
meta: Dict[str, Any] = util.SimpleFrozenDict()
) -> Language: ) -> Language:
"""Create a blank nlp object for a given language code. """Create a blank nlp object for a given language code.
name (str): The language code, e.g. "en". name (str): The language code, e.g. "en".
config (Dict[str, Any] / Config): Optional config overrides. config (Dict[str, Any] / Config): Optional config overrides.
meta (Dict[str, Any]): Overrides for nlp.meta.
RETURNS (Language): The nlp object. RETURNS (Language): The nlp object.
""" """
LangClass = util.get_lang_class(name) LangClass = util.get_lang_class(name)
return LangClass.from_config(config) return LangClass.from_config(config, meta=meta)

View File

@ -1458,6 +1458,7 @@ class Language:
vocab: Union[Vocab, bool] = True, vocab: Union[Vocab, bool] = True,
disable: Iterable[str] = SimpleFrozenList(), disable: Iterable[str] = SimpleFrozenList(),
exclude: Iterable[str] = SimpleFrozenList(), exclude: Iterable[str] = SimpleFrozenList(),
meta: Dict[str, Any] = SimpleFrozenDict(),
auto_fill: bool = True, auto_fill: bool = True,
validate: bool = True, validate: bool = True,
) -> "Language": ) -> "Language":
@ -1472,6 +1473,7 @@ class Language:
explicitly enable them by calling nlp.enable_pipe. explicitly enable them by calling nlp.enable_pipe.
exclude (Iterable[str]): Names of pipeline components to exclude. exclude (Iterable[str]): Names of pipeline components to exclude.
Excluded components won't be loaded. Excluded components won't be loaded.
meta (Dict[str, Any]): Meta overrides for nlp.meta.
auto_fill (bool): Automatically fill in missing values in config based auto_fill (bool): Automatically fill in missing values in config based
on defaults and function argument annotations. on defaults and function argument annotations.
validate (bool): Validate the component config and arguments against validate (bool): Validate the component config and arguments against
@ -1525,7 +1527,7 @@ class Language:
# inside stuff like the spacy train function. If we loaded them here, # inside stuff like the spacy train function. If we loaded them here,
# then we would load them twice at runtime: once when we make from config, # then we would load them twice at runtime: once when we make from config,
# and then again when we load from disk. # and then again when we load from disk.
nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer) nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer, meta=meta)
if after_creation is not None: if after_creation is not None:
nlp = after_creation(nlp) nlp = after_creation(nlp)
if not isinstance(nlp, cls): if not isinstance(nlp, cls):

View File

@ -6,6 +6,7 @@ from spacy.vocab import Vocab
from spacy.training import Example from spacy.training import Example
from spacy.lang.en import English from spacy.lang.en import English
from spacy.util import registry from spacy.util import registry
import spacy
from .util import add_vecs_to_vocab, assert_docs_equal from .util import add_vecs_to_vocab, assert_docs_equal
@ -266,3 +267,13 @@ def test_language_custom_tokenizer():
assert [t.text for t in doc] == ["_hello", "_world"] assert [t.text for t in doc] == ["_hello", "_world"]
doc = list(nlp.pipe(["hello world"]))[0] doc = list(nlp.pipe(["hello world"]))[0]
assert [t.text for t in doc] == ["_hello", "_world"] assert [t.text for t in doc] == ["_hello", "_world"]
def test_spacy_blank():
nlp = spacy.blank("en")
assert nlp.config["training"]["dropout"] == 0.1
config = {"training": {"dropout": 0.2}}
meta = {"name": "my_custom_model"}
nlp = spacy.blank("en", config=config, meta=meta)
assert nlp.config["training"]["dropout"] == 0.2
assert nlp.meta["name"] == "my_custom_model"