diff --git a/spacy/__init__.py b/spacy/__init__.py index d640e2bbc..96487ec2a 100644 --- a/spacy/__init__.py +++ b/spacy/__init__.py @@ -47,13 +47,17 @@ def load( def blank( - name: str, *, config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict() + name: str, + *, + config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(), + meta: Dict[str, Any] = util.SimpleFrozenDict() ) -> Language: """Create a blank nlp object for a given language code. name (str): The language code, e.g. "en". config (Dict[str, Any] / Config): Optional config overrides. + meta (Dict[str, Any]): Overrides for nlp.meta. RETURNS (Language): The nlp object. """ LangClass = util.get_lang_class(name) - return LangClass.from_config(config) + return LangClass.from_config(config, meta=meta) diff --git a/spacy/language.py b/spacy/language.py index 905cdca36..e71f4215f 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1458,6 +1458,7 @@ class Language: vocab: Union[Vocab, bool] = True, disable: Iterable[str] = SimpleFrozenList(), exclude: Iterable[str] = SimpleFrozenList(), + meta: Dict[str, Any] = SimpleFrozenDict(), auto_fill: bool = True, validate: bool = True, ) -> "Language": @@ -1472,6 +1473,7 @@ class Language: explicitly enable them by calling nlp.enable_pipe. exclude (Iterable[str]): Names of pipeline components to exclude. Excluded components won't be loaded. + meta (Dict[str, Any]): Meta overrides for nlp.meta. auto_fill (bool): Automatically fill in missing values in config based on defaults and function argument annotations. validate (bool): Validate the component config and arguments against @@ -1525,7 +1527,7 @@ class Language: # inside stuff like the spacy train function. If we loaded them here, # then we would load them twice at runtime: once when we make from config, # and then again when we load from disk. - nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer) + nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer, meta=meta) if after_creation is not None: nlp = after_creation(nlp) if not isinstance(nlp, cls): diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py index 840d878c2..1156d26b0 100644 --- a/spacy/tests/test_language.py +++ b/spacy/tests/test_language.py @@ -6,6 +6,7 @@ from spacy.vocab import Vocab from spacy.training import Example from spacy.lang.en import English from spacy.util import registry +import spacy from .util import add_vecs_to_vocab, assert_docs_equal @@ -266,3 +267,13 @@ def test_language_custom_tokenizer(): assert [t.text for t in doc] == ["_hello", "_world"] doc = list(nlp.pipe(["hello world"]))[0] assert [t.text for t in doc] == ["_hello", "_world"] + + +def test_spacy_blank(): + nlp = spacy.blank("en") + assert nlp.config["training"]["dropout"] == 0.1 + config = {"training": {"dropout": 0.2}} + meta = {"name": "my_custom_model"} + nlp = spacy.blank("en", config=config, meta=meta) + assert nlp.config["training"]["dropout"] == 0.2 + assert nlp.meta["name"] == "my_custom_model"