From 57cbac78f459acb00a0668715852c0fa4965365b Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 14 Sep 2024 13:13:05 +0200 Subject: [PATCH] Fix numpy floating values in meta.json for serialization --- spacy/language.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index 18a58afd5..7f1d6a0d2 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -33,6 +33,7 @@ from typing import ( import srsly from cymem.cymem import Pool from thinc.api import Config, CupyOps, Optimizer, get_current_ops +from thinc.util import convert_recursive from . import about, ty, util from .compat import Literal @@ -2141,7 +2142,7 @@ class Language: serializers["tokenizer"] = lambda p: self.tokenizer.to_disk( # type: ignore[union-attr] p, exclude=["vocab"] ) - serializers["meta.json"] = lambda p: srsly.write_json(p, self.meta) + serializers["meta.json"] = lambda p: srsly.write_json(p, _replace_numpy_floats(self.meta)) serializers["config.cfg"] = lambda p: self.config.to_disk(p) for name, proc in self._components: if name in exclude: @@ -2255,7 +2256,7 @@ class Language: serializers: Dict[str, Callable[[], bytes]] = {} serializers["vocab"] = lambda: self.vocab.to_bytes(exclude=exclude) serializers["tokenizer"] = lambda: self.tokenizer.to_bytes(exclude=["vocab"]) # type: ignore[union-attr] - serializers["meta.json"] = lambda: srsly.json_dumps(self.meta) + serializers["meta.json"] = lambda: srsly.json_dumps(_replace_numpy_floats(self.meta)) serializers["config.cfg"] = lambda: self.config.to_bytes() for name, proc in self._components: if name in exclude: @@ -2306,6 +2307,10 @@ class Language: return self +def _replace_numpy_floats(meta_dict: dict) -> dict: + return convert_recursive(lambda v: isinstance(v, numpy.floating), lambda v: float(v), dict(meta_dict)) + + @dataclass class FactoryMeta: """Dataclass containing information about a component and its defaults