mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 09:44:36 +03:00
Custom warning if the doc_bin is too large (#8069)
* custom warning if the doc_bin is too large * cleanup * Update spacy/errors.py Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * fix numbering * fixing numbering once more * fixing this seems to be pretty hard Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
parent
b120fb3511
commit
0dffc5d9e2
|
@ -490,6 +490,11 @@ class Errors:
|
||||||
E202 = ("Unsupported alignment mode '{mode}'. Supported modes: {modes}.")
|
E202 = ("Unsupported alignment mode '{mode}'. Supported modes: {modes}.")
|
||||||
|
|
||||||
# New errors added in v3.x
|
# New errors added in v3.x
|
||||||
|
E870 = ("Could not serialize the DocBin because it is too large. Consider "
|
||||||
|
"splitting up your documents into several doc bins and serializing "
|
||||||
|
"each separately. spacy.Corpus.v1 will search recursively for all "
|
||||||
|
"*.spacy files if you provide a directory instead of a filename as "
|
||||||
|
"the 'path'.")
|
||||||
E871 = ("Error encountered in nlp.pipe with multiprocessing:\n\n{error}")
|
E871 = ("Error encountered in nlp.pipe with multiprocessing:\n\n{error}")
|
||||||
E872 = ("Unable to copy tokenizer from base model due to different "
|
E872 = ("Unable to copy tokenizer from base model due to different "
|
||||||
'tokenizer settings: current tokenizer config "{curr_config}" '
|
'tokenizer settings: current tokenizer config "{curr_config}" '
|
||||||
|
|
|
@ -246,7 +246,10 @@ class DocBin:
|
||||||
"""
|
"""
|
||||||
path = ensure_path(path)
|
path = ensure_path(path)
|
||||||
with path.open("wb") as file_:
|
with path.open("wb") as file_:
|
||||||
|
try:
|
||||||
file_.write(self.to_bytes())
|
file_.write(self.to_bytes())
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError(Errors.E870)
|
||||||
|
|
||||||
def from_disk(self, path: Union[str, Path]) -> "DocBin":
|
def from_disk(self, path: Union[str, Path]) -> "DocBin":
|
||||||
"""Load the DocBin from a file (typically called .spacy).
|
"""Load the DocBin from a file (typically called .spacy).
|
||||||
|
|
Loading…
Reference in New Issue
Block a user