Improve error message when reading vectors

This commit is contained in:
Matthew Honnibal 2018-04-10 21:26:37 +02:00
parent db50ac524e
commit 0ddb152be0
2 changed files with 3 additions and 3 deletions

View File

@ -14,7 +14,7 @@ import zipfile
from ._messages import Messages from ._messages import Messages
from ..vectors import Vectors from ..vectors import Vectors
from ..errors import Warnings, user_warning from ..errors import Errors, Warnings, user_warning
from ..util import prints, ensure_path, get_lang_class from ..util import prints, ensure_path, get_lang_class
try: try:
@ -114,8 +114,7 @@ def read_vectors(vectors_loc):
pieces = line.rsplit(' ', vectors_data.shape[1]+1) pieces = line.rsplit(' ', vectors_data.shape[1]+1)
word = pieces.pop(0) word = pieces.pop(0)
if len(pieces) != vectors_data.shape[1]: if len(pieces) != vectors_data.shape[1]:
print(word, repr(line)) raise ValueError(Errors.E094.format(line_num=i, loc=vectors_loc)
raise ValueError("Bad line in file")
vectors_data[i] = numpy.asarray(pieces, dtype='f') vectors_data[i] = numpy.asarray(pieces, dtype='f')
vectors_keys.append(word) vectors_keys.append(word)
return vectors_data, vectors_keys return vectors_data, vectors_keys

View File

@ -244,6 +244,7 @@ class Errors(object):
"Alternatively, it is built from the 'lang' and 'name' keys in " "Alternatively, it is built from the 'lang' and 'name' keys in "
"the meta.json. Vector names are required to avoid issue #1660.") "the meta.json. Vector names are required to avoid issue #1660.")
E093 = ("token.ent_iob values make invalid sequence: I without B\n{seq}") E093 = ("token.ent_iob values make invalid sequence: I without B\n{seq}")
E094 = ("Error reading line {line_num} in vectors file {loc}.")
@add_codes @add_codes