mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-28 02:04:07 +03:00
* Add LookupError for better error reporting in Vocab
This commit is contained in:
parent
ecc5281b36
commit
3d9f41c2c9
|
@ -96,7 +96,9 @@ cdef class Vocab:
|
||||||
lex = <LexemeC*>self._by_hash.get(key)
|
lex = <LexemeC*>self._by_hash.get(key)
|
||||||
cdef size_t addr
|
cdef size_t addr
|
||||||
if lex != NULL:
|
if lex != NULL:
|
||||||
assert lex.orth == self.strings[string]
|
if lex.orth != self.strings[string]:
|
||||||
|
raise LookupError.mismatched_strings(
|
||||||
|
lex.orth, self.strings[lex.orth], string)
|
||||||
return lex
|
return lex
|
||||||
else:
|
else:
|
||||||
return self._new_lexeme(mem, string)
|
return self._new_lexeme(mem, string)
|
||||||
|
@ -352,6 +354,21 @@ def write_binary_vectors(in_loc, out_loc):
|
||||||
out_file.write_from(vec, vec_len, sizeof(float))
|
out_file.write_from(vec, vec_len, sizeof(float))
|
||||||
|
|
||||||
|
|
||||||
|
class LookupError(Exception):
|
||||||
|
@classmethod
|
||||||
|
def mismatched_strings(cls, id_, id_string, original_string):
|
||||||
|
return cls(
|
||||||
|
"Error fetching a Lexeme from the Vocab. When looking up a string, "
|
||||||
|
"the lexeme returned had an orth ID that did not match the query string. "
|
||||||
|
"This means that the cached lexeme structs are mismatched to the "
|
||||||
|
"string encoding table. The mismatched:\n"
|
||||||
|
"Query string: {query}\n"
|
||||||
|
"Orth cached: {orth_str}\n"
|
||||||
|
"ID of orth: {orth_id}".format(
|
||||||
|
query=original_string, orth_str=id_string, orth_id=id_)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class VectorReadError(Exception):
|
class VectorReadError(Exception):
|
||||||
@classmethod
|
@classmethod
|
||||||
def mismatched_sizes(cls, loc, line_num, prev_size, curr_size):
|
def mismatched_sizes(cls, loc, line_num, prev_size, curr_size):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user