Allow overriding meta from spacy.blank

2025-09-15 16:42:36 +03:00 · 2020-09-15 11:12:12 +02:00 · 2020-09-15 11:12:12 +02:00 · 7dfc4bc062
commit 7dfc4bc062
parent 0f943157af
3 changed files with 20 additions and 3 deletions
--- a/spacy/init.py
+++ b/spacy/init.py
@ -47,13 +47,17 @@ def load(


 def blank(
-    name: str, *, config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict()
+    name: str,
+    *,
+    config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
+    meta: Dict[str, Any] = util.SimpleFrozenDict()
 ) -> Language:
    """Create a blank nlp object for a given language code.

    name (str): The language code, e.g. "en".
    config (Dict[str, Any] / Config): Optional config overrides.
+    meta (Dict[str, Any]): Overrides for nlp.meta.
    RETURNS (Language): The nlp object.
    """
    LangClass = util.get_lang_class(name)
-    return LangClass.from_config(config)
+    return LangClass.from_config(config, meta=meta)
--- a/spacy/language.py
+++ b/spacy/language.py
@ -1458,6 +1458,7 @@ class Language:
        vocab: Union[Vocab, bool] = True,
        disable: Iterable[str] = SimpleFrozenList(),
        exclude: Iterable[str] = SimpleFrozenList(),
+        meta: Dict[str, Any] = SimpleFrozenDict(),
        auto_fill: bool = True,
        validate: bool = True,
    ) -> "Language":
@ -1472,6 +1473,7 @@ class Language:
            explicitly enable them by calling nlp.enable_pipe.
        exclude (Iterable[str]): Names of pipeline components to exclude.
            Excluded components won't be loaded.
+        meta (Dict[str, Any]): Meta overrides for nlp.meta.
        auto_fill (bool): Automatically fill in missing values in config based
            on defaults and function argument annotations.
        validate (bool): Validate the component config and arguments against
@ -1525,7 +1527,7 @@ class Language:
        # inside stuff like the spacy train function. If we loaded them here,
        # then we would load them twice at runtime: once when we make from config,
        # and then again when we load from disk.
-        nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer)
+        nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer, meta=meta)
        if after_creation is not None:
            nlp = after_creation(nlp)
            if not isinstance(nlp, cls):
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@ -6,6 +6,7 @@ from spacy.vocab import Vocab
 from spacy.training import Example
 from spacy.lang.en import English
 from spacy.util import registry
+import spacy

 from .util import add_vecs_to_vocab, assert_docs_equal

@ -266,3 +267,13 @@ def test_language_custom_tokenizer():
    assert [t.text for t in doc] == ["_hello", "_world"]
    doc = list(nlp.pipe(["hello world"]))[0]
    assert [t.text for t in doc] == ["_hello", "_world"]
+
+
+def test_spacy_blank():
+    nlp = spacy.blank("en")
+    assert nlp.config["training"]["dropout"] == 0.1
+    config = {"training": {"dropout": 0.2}}
+    meta = {"name": "my_custom_model"}
+    nlp = spacy.blank("en", config=config, meta=meta)
+    assert nlp.config["training"]["dropout"] == 0.2
+    assert nlp.meta["name"] == "my_custom_model"