* Add LookupError for better error reporting in Vocab

This commit is contained in:
Matthew Honnibal 2015-10-06 10:34:59 +11:00
parent ecc5281b36
commit 3d9f41c2c9

View File

@ -96,7 +96,9 @@ cdef class Vocab:
lex = <LexemeC*>self._by_hash.get(key) lex = <LexemeC*>self._by_hash.get(key)
cdef size_t addr cdef size_t addr
if lex != NULL: if lex != NULL:
assert lex.orth == self.strings[string] if lex.orth != self.strings[string]:
raise LookupError.mismatched_strings(
lex.orth, self.strings[lex.orth], string)
return lex return lex
else: else:
return self._new_lexeme(mem, string) return self._new_lexeme(mem, string)
@ -352,6 +354,21 @@ def write_binary_vectors(in_loc, out_loc):
out_file.write_from(vec, vec_len, sizeof(float)) out_file.write_from(vec, vec_len, sizeof(float))
class LookupError(Exception):
@classmethod
def mismatched_strings(cls, id_, id_string, original_string):
return cls(
"Error fetching a Lexeme from the Vocab. When looking up a string, "
"the lexeme returned had an orth ID that did not match the query string. "
"This means that the cached lexeme structs are mismatched to the "
"string encoding table. The mismatched:\n"
"Query string: {query}\n"
"Orth cached: {orth_str}\n"
"ID of orth: {orth_id}".format(
query=original_string, orth_str=id_string, orth_id=id_)
)
class VectorReadError(Exception): class VectorReadError(Exception):
@classmethod @classmethod
def mismatched_sizes(cls, loc, line_num, prev_size, curr_size): def mismatched_sizes(cls, loc, line_num, prev_size, curr_size):