From 431c2ecd784cd204a395ea75b457e5574dcd599c Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Sun, 2 Apr 2023 11:56:21 +0200 Subject: [PATCH] Auto-fill [nlp] on load from config and from bytes/disk --- spacy/language.py | 24 +++++++++++++++--------- spacy/util.py | 2 +- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index 559e245c2..0606a57ab 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -2101,6 +2101,13 @@ class Language: DOCS: https://spacy.io/api/language#from_disk """ + def deserialize_config(path: Path) -> None: + if path.exists(): + config = Config().from_disk( + path, interpolate=False, overrides=overrides + ) + self.config.merge(config) + def deserialize_meta(path: Path) -> None: if path.exists(): data = srsly.read_json(path) @@ -2115,12 +2122,9 @@ class Language: path = util.ensure_path(path) deserializers = {} - if Path(path / "config.cfg").exists(): # type: ignore[operator] - deserializers["config.cfg"] = lambda p: self.config.from_disk( - p, interpolate=False, overrides=overrides - ) - deserializers["meta.json"] = deserialize_meta # type: ignore[assignment] - deserializers["vocab"] = deserialize_vocab # type: ignore[assignment] + deserializers["config.cfg"] = deserialize_config + deserializers["meta.json"] = deserialize_meta + deserializers["vocab"] = deserialize_vocab deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk( # type: ignore[union-attr] p, exclude=["vocab"] ) @@ -2173,6 +2177,10 @@ class Language: DOCS: https://spacy.io/api/language#from_bytes """ + def deserialize_config(b): + config = Config().from_bytes(b, interpolate=False) + self.config.merge(config) + def deserialize_meta(b): data = srsly.json_loads(b) self.meta.update(data) @@ -2181,9 +2189,7 @@ class Language: self.vocab.vectors.name = data.get("vectors", {}).get("name") deserializers: Dict[str, Callable[[bytes], Any]] = {} - deserializers["config.cfg"] = lambda b: self.config.from_bytes( - b, interpolate=False - ) + deserializers["config.cfg"] = deserialize_config deserializers["meta.json"] = deserialize_meta deserializers["vocab"] = lambda b: self.vocab.from_bytes(b, exclude=exclude) deserializers["tokenizer"] = lambda b: self.tokenizer.from_bytes( # type: ignore[union-attr] diff --git a/spacy/util.py b/spacy/util.py index 8cc89217d..d0e2fb83f 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -532,7 +532,7 @@ def load_model_from_config( disable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, - auto_fill: bool = False, + auto_fill: bool = True, validate: bool = True, ) -> "Language": """Create an nlp object from a config. Expects the full config file including