mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Custom warning if the doc_bin is too large (#8069)
* custom warning if the doc_bin is too large * cleanup * Update spacy/errors.py Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * fix numbering * fixing numbering once more * fixing this seems to be pretty hard Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
parent
b120fb3511
commit
0dffc5d9e2
|
@ -490,6 +490,11 @@ class Errors:
|
|||
E202 = ("Unsupported alignment mode '{mode}'. Supported modes: {modes}.")
|
||||
|
||||
# New errors added in v3.x
|
||||
E870 = ("Could not serialize the DocBin because it is too large. Consider "
|
||||
"splitting up your documents into several doc bins and serializing "
|
||||
"each separately. spacy.Corpus.v1 will search recursively for all "
|
||||
"*.spacy files if you provide a directory instead of a filename as "
|
||||
"the 'path'.")
|
||||
E871 = ("Error encountered in nlp.pipe with multiprocessing:\n\n{error}")
|
||||
E872 = ("Unable to copy tokenizer from base model due to different "
|
||||
'tokenizer settings: current tokenizer config "{curr_config}" '
|
||||
|
|
|
@ -246,7 +246,10 @@ class DocBin:
|
|||
"""
|
||||
path = ensure_path(path)
|
||||
with path.open("wb") as file_:
|
||||
file_.write(self.to_bytes())
|
||||
try:
|
||||
file_.write(self.to_bytes())
|
||||
except ValueError:
|
||||
raise ValueError(Errors.E870)
|
||||
|
||||
def from_disk(self, path: Union[str, Path]) -> "DocBin":
|
||||
"""Load the DocBin from a file (typically called .spacy).
|
||||
|
|
Loading…
Reference in New Issue
Block a user