Fix dump meta

This commit is contained in:
Matthew Honnibal 2024-09-07 00:46:48 +02:00
parent ae6910b09b
commit b4e60e3151

View File

@ -2223,14 +2223,14 @@ class Language:
DOCS: https://spacy.io/api/language#to_disk
"""
def _dump_meta(path, meta) -> None:
def _dump_meta(path) -> None:
"""Helper function for to_disk. It's not entirely clear why this is
necessary: we're seeing numpy.float32 values in the Ukraining trf
model meta, maybe it'll happen elsewhere as well -- but it seems
to originate from the specific transformer code being used by that
model.
"""
meta = convert_recursive(lambda v: isinstance(v, numpy.float32), lambda v: float(v), dict(meta))
meta = convert_recursive(lambda v: isinstance(v, numpy.floating), lambda v: float(v), dict(self.meta))
srsly.write_json(path, meta)
path = util.ensure_path(path)
@ -2353,13 +2353,13 @@ class Language:
to originate from the specific transformer code being used by that
model.
"""
meta = convert_recursive(lambda v: isinstance(v, numpy.float32), lambda v: float(v), dict(self.meta))
meta = convert_recursive(lambda v: isinstance(v, numpy.floating), lambda v: float(v), dict(self.meta))
return srsly.json_dumps(meta)
serializers: Dict[str, Callable[[], bytes]] = {}
serializers["vocab"] = lambda: self.vocab.to_bytes(exclude=exclude)
serializers["tokenizer"] = lambda: self.tokenizer.to_bytes(exclude=["vocab"]) # type: ignore[union-attr]
serializers["meta.json"] = _dump_meta
serializers["meta.json"] = _dump_meta # type: ignore
serializers["config.cfg"] = lambda: self.config.to_bytes()
for name, proc in self._components:
if name in exclude: