mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
Allow overriding meta from spacy.blank
This commit is contained in:
parent
0f943157af
commit
7dfc4bc062
|
@ -47,13 +47,17 @@ def load(
|
||||||
|
|
||||||
|
|
||||||
def blank(
|
def blank(
|
||||||
name: str, *, config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict()
|
name: str,
|
||||||
|
*,
|
||||||
|
config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
|
||||||
|
meta: Dict[str, Any] = util.SimpleFrozenDict()
|
||||||
) -> Language:
|
) -> Language:
|
||||||
"""Create a blank nlp object for a given language code.
|
"""Create a blank nlp object for a given language code.
|
||||||
|
|
||||||
name (str): The language code, e.g. "en".
|
name (str): The language code, e.g. "en".
|
||||||
config (Dict[str, Any] / Config): Optional config overrides.
|
config (Dict[str, Any] / Config): Optional config overrides.
|
||||||
|
meta (Dict[str, Any]): Overrides for nlp.meta.
|
||||||
RETURNS (Language): The nlp object.
|
RETURNS (Language): The nlp object.
|
||||||
"""
|
"""
|
||||||
LangClass = util.get_lang_class(name)
|
LangClass = util.get_lang_class(name)
|
||||||
return LangClass.from_config(config)
|
return LangClass.from_config(config, meta=meta)
|
||||||
|
|
|
@ -1458,6 +1458,7 @@ class Language:
|
||||||
vocab: Union[Vocab, bool] = True,
|
vocab: Union[Vocab, bool] = True,
|
||||||
disable: Iterable[str] = SimpleFrozenList(),
|
disable: Iterable[str] = SimpleFrozenList(),
|
||||||
exclude: Iterable[str] = SimpleFrozenList(),
|
exclude: Iterable[str] = SimpleFrozenList(),
|
||||||
|
meta: Dict[str, Any] = SimpleFrozenDict(),
|
||||||
auto_fill: bool = True,
|
auto_fill: bool = True,
|
||||||
validate: bool = True,
|
validate: bool = True,
|
||||||
) -> "Language":
|
) -> "Language":
|
||||||
|
@ -1472,6 +1473,7 @@ class Language:
|
||||||
explicitly enable them by calling nlp.enable_pipe.
|
explicitly enable them by calling nlp.enable_pipe.
|
||||||
exclude (Iterable[str]): Names of pipeline components to exclude.
|
exclude (Iterable[str]): Names of pipeline components to exclude.
|
||||||
Excluded components won't be loaded.
|
Excluded components won't be loaded.
|
||||||
|
meta (Dict[str, Any]): Meta overrides for nlp.meta.
|
||||||
auto_fill (bool): Automatically fill in missing values in config based
|
auto_fill (bool): Automatically fill in missing values in config based
|
||||||
on defaults and function argument annotations.
|
on defaults and function argument annotations.
|
||||||
validate (bool): Validate the component config and arguments against
|
validate (bool): Validate the component config and arguments against
|
||||||
|
@ -1525,7 +1527,7 @@ class Language:
|
||||||
# inside stuff like the spacy train function. If we loaded them here,
|
# inside stuff like the spacy train function. If we loaded them here,
|
||||||
# then we would load them twice at runtime: once when we make from config,
|
# then we would load them twice at runtime: once when we make from config,
|
||||||
# and then again when we load from disk.
|
# and then again when we load from disk.
|
||||||
nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer)
|
nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer, meta=meta)
|
||||||
if after_creation is not None:
|
if after_creation is not None:
|
||||||
nlp = after_creation(nlp)
|
nlp = after_creation(nlp)
|
||||||
if not isinstance(nlp, cls):
|
if not isinstance(nlp, cls):
|
||||||
|
|
|
@ -6,6 +6,7 @@ from spacy.vocab import Vocab
|
||||||
from spacy.training import Example
|
from spacy.training import Example
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.util import registry
|
from spacy.util import registry
|
||||||
|
import spacy
|
||||||
|
|
||||||
from .util import add_vecs_to_vocab, assert_docs_equal
|
from .util import add_vecs_to_vocab, assert_docs_equal
|
||||||
|
|
||||||
|
@ -266,3 +267,13 @@ def test_language_custom_tokenizer():
|
||||||
assert [t.text for t in doc] == ["_hello", "_world"]
|
assert [t.text for t in doc] == ["_hello", "_world"]
|
||||||
doc = list(nlp.pipe(["hello world"]))[0]
|
doc = list(nlp.pipe(["hello world"]))[0]
|
||||||
assert [t.text for t in doc] == ["_hello", "_world"]
|
assert [t.text for t in doc] == ["_hello", "_world"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_spacy_blank():
|
||||||
|
nlp = spacy.blank("en")
|
||||||
|
assert nlp.config["training"]["dropout"] == 0.1
|
||||||
|
config = {"training": {"dropout": 0.2}}
|
||||||
|
meta = {"name": "my_custom_model"}
|
||||||
|
nlp = spacy.blank("en", config=config, meta=meta)
|
||||||
|
assert nlp.config["training"]["dropout"] == 0.2
|
||||||
|
assert nlp.meta["name"] == "my_custom_model"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user