fix error numbers

This commit is contained in:
svlandeg 2020-06-15 08:51:31 +02:00
parent 3c0fc10dc4
commit a48553c1ed
3 changed files with 11 additions and 15 deletions

View File

@ -580,10 +580,14 @@ class Errors(object):
"table, which contains {n_rows} vectors.") "table, which contains {n_rows} vectors.")
# TODO: fix numbering after merging develop into master # TODO: fix numbering after merging develop into master
E980 = ("Each link annotation should refer to a dictionary with at most one "
"identifier mapping to 1.0, and all others to 0.0.")
E981 = ("The offsets of the annotations for 'links' need to refer exactly "
"to the offsets of the 'entities' annotations.")
E982 = ("The 'ent_iob' attribute of a Token should be an integer indexing "
"into {values}, but found {value}.")
E983 = ("Invalid key for '{dict_name}': {key}. Available keys: " E983 = ("Invalid key for '{dict_name}': {key}. Available keys: "
"{keys}") "{keys}")
E984 = ("Could not parse the {input} - double check the data is written "
"in the correct format as expected by spaCy.")
E985 = ("The pipeline component '{component}' is already available in the base " E985 = ("The pipeline component '{component}' is already available in the base "
"model. The settings in the component block in the config file are " "model. The settings in the component block in the config file are "
"being ignored. If you want to replace this component instead, set " "being ignored. If you want to replace this component instead, set "
@ -620,14 +624,6 @@ class Errors(object):
E999 = ("Encountered an unexpected format for the dictionary holding " E999 = ("Encountered an unexpected format for the dictionary holding "
"gold annotations: {gold_dict}") "gold annotations: {gold_dict}")
# TODO: These were left over after a merge, but I couldn't find them?
#E983 = ("Each link annotation should refer to a dictionary with at most one "
# "identifier mapping to 1.0, and all others to 0.0.")
#E984 = ("The offsets of the annotations for 'links' need to refer exactly "
# "to the offsets of the 'entities' annotations.")
#E985 = ("The 'ent_iob' attribute of a Token should be an integer indexing "
# "into {values}, but found {value}.")
@add_codes @add_codes
class TempErrors(object): class TempErrors(object):

View File

@ -155,7 +155,7 @@ def _annot2array(vocab, tok_annot, doc_annot):
elif key == "links": elif key == "links":
entities = doc_annot.get("entities", {}) entities = doc_annot.get("entities", {})
if value and not entities: if value and not entities:
raise ValueError(Errors.E984) raise ValueError(Errors.E981)
ent_kb_ids = _parse_links(vocab, words, value, entities) ent_kb_ids = _parse_links(vocab, words, value, entities)
tok_annot["ENT_KB_ID"] = ent_kb_ids tok_annot["ENT_KB_ID"] = ent_kb_ids
elif key == "cats": elif key == "cats":
@ -183,7 +183,7 @@ def _annot2array(vocab, tok_annot, doc_annot):
try: try:
values.append([iob_strings.index(v) for v in value]) values.append([iob_strings.index(v) for v in value])
except ValueError: except ValueError:
raise ValueError(Errors.E985.format(values=iob_strings, value=values)) raise ValueError(Errors.E982.format(values=iob_strings, value=values))
else: else:
attrs.append(key) attrs.append(key)
values.append([vocab.strings.add(v) for v in value]) values.append([vocab.strings.add(v) for v in value])
@ -273,7 +273,7 @@ def _parse_links(vocab, words, links, entities):
for index, annot_dict in links.items(): for index, annot_dict in links.items():
start_char, end_char = index start_char, end_char = index
if (start_char, end_char) not in entity_map: if (start_char, end_char) not in entity_map:
raise ValueError(Errors.E984) raise ValueError(Errors.E981)
for index, annot_dict in links.items(): for index, annot_dict in links.items():
true_kb_ids = [] true_kb_ids = []
@ -281,7 +281,7 @@ def _parse_links(vocab, words, links, entities):
if value == 1.0: if value == 1.0:
true_kb_ids.append(key) true_kb_ids.append(key)
if len(true_kb_ids) > 1: if len(true_kb_ids) > 1:
raise ValueError(Errors.E983) raise ValueError(Errors.E980)
if len(true_kb_ids) == 1: if len(true_kb_ids) == 1:
start_char, end_char = index start_char, end_char = index

View File

@ -831,7 +831,7 @@ cdef class Doc:
col = attrs.index(ENT_IOB) col = attrs.index(ENT_IOB)
for i in range(length): for i in range(length):
if array[i, col] not in range(0, len(iob_strings)): if array[i, col] not in range(0, len(iob_strings)):
raise ValueError(Errors.E985.format(values=iob_strings, value=array[i, col])) raise ValueError(Errors.E982.format(values=iob_strings, value=array[i, col]))
# Now load the data # Now load the data
for i in range(length): for i in range(length):
token = &self.c[i] token = &self.c[i]