Add error message if DocBin zlib decompress fails (#6394)

Add a better error message if DocBin zlib decompress fails, indicating
that the data is not in `DocBin` format.
This commit is contained in:
Adriane Boyd 2020-11-27 07:39:49 +01:00 committed by GitHub
parent 165993d8e5
commit 26296ab223
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 8 additions and 1 deletions

View File

@ -712,6 +712,10 @@ class Errors:
E1013 = ("Invalid morph: the MorphAnalysis must have the same vocab as the "
"token itself. To set the morph from this MorphAnalysis, set from "
"the string value with: `token.set_morph(str(other_morph))`.")
E1014 = ("Error loading DocBin data. It doesn't look like the data is in "
"DocBin (.spacy) format. If your data is in spaCy v2's JSON "
"training format, convert it using `python -m spacy convert "
"file.json .`.")
# Deprecated model shortcuts, only used in errors and warnings

View File

@ -198,7 +198,10 @@ class DocBin:
DOCS: https://nightly.spacy.io/api/docbin#from_bytes
"""
msg = srsly.msgpack_loads(zlib.decompress(bytes_data))
try:
msg = srsly.msgpack_loads(zlib.decompress(bytes_data))
except zlib.error:
raise ValueError(Errors.E1014)
self.attrs = msg["attrs"]
self.strings = set(msg["strings"])
lengths = numpy.frombuffer(msg["lengths"], dtype="int32")