fix error numbers

2025-11-07 03:17:37 +03:00 · 2020-06-15 08:51:31 +02:00 · 2020-06-15 08:51:31 +02:00 · a48553c1ed
commit a48553c1ed
parent 3c0fc10dc4
3 changed files with 11 additions and 15 deletions
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -580,10 +580,14 @@ class Errors(object):
            "table, which contains {n_rows} vectors.")
    # TODO: fix numbering after merging develop into master
    E980 = ("Each link annotation should refer to a dictionary with at most one "
            "identifier mapping to 1.0, and all others to 0.0.")
    E981 = ("The offsets of the annotations for 'links' need to refer exactly "
            "to the offsets of the 'entities' annotations.")
    E982 = ("The 'ent_iob' attribute of a Token should be an integer indexing "
            "into {values}, but found {value}.")
    E983 = ("Invalid key for '{dict_name}': {key}. Available keys: "
            "{keys}")
    E984 = ("Could not parse the {input} - double check the data is written "
            "in the correct format as expected by spaCy.")
    E985 = ("The pipeline component '{component}' is already available in the base "
            "model. The settings in the component block in the config file are "
            "being ignored. If you want to replace this component instead, set "
@ -620,14 +624,6 @@ class Errors(object):
    E999 = ("Encountered an unexpected format for the dictionary holding "
            "gold annotations: {gold_dict}")
    # TODO: These were left over after a merge, but I couldn't find them?
    #E983 = ("Each link annotation should refer to a dictionary with at most one "
    #        "identifier mapping to 1.0, and all others to 0.0.")
    #E984 = ("The offsets of the annotations for 'links' need to refer exactly "
    #        "to the offsets of the 'entities' annotations.")
    #E985 = ("The 'ent_iob' attribute of a Token should be an integer indexing "
    #        "into {values}, but found {value}.")
@add_codes
 class TempErrors(object):
--- a/spacy/gold/example.pyx
+++ b/spacy/gold/example.pyx
@ -155,7 +155,7 @@ def _annot2array(vocab, tok_annot, doc_annot):
        elif key == "links":
            entities = doc_annot.get("entities", {})
            if value and not entities:
-                raise ValueError(Errors.E984)
+                raise ValueError(Errors.E981)
            ent_kb_ids = _parse_links(vocab, words, value, entities)
            tok_annot["ENT_KB_ID"] = ent_kb_ids
        elif key == "cats":
@ -183,7 +183,7 @@ def _annot2array(vocab, tok_annot, doc_annot):
            try:
                values.append([iob_strings.index(v) for v in value])
            except ValueError:
-                raise ValueError(Errors.E985.format(values=iob_strings, value=values))
+                raise ValueError(Errors.E982.format(values=iob_strings, value=values))
        else:
            attrs.append(key)
            values.append([vocab.strings.add(v) for v in value])
@ -273,7 +273,7 @@ def _parse_links(vocab, words, links, entities):
    for index, annot_dict in links.items():
        start_char, end_char = index
        if (start_char, end_char) not in entity_map:
-            raise ValueError(Errors.E984)
+            raise ValueError(Errors.E981)
    for index, annot_dict in links.items():
        true_kb_ids = []
@ -281,7 +281,7 @@ def _parse_links(vocab, words, links, entities):
            if value == 1.0:
                true_kb_ids.append(key)
        if len(true_kb_ids) > 1:
-            raise ValueError(Errors.E983)
+            raise ValueError(Errors.E980)
        if len(true_kb_ids) == 1:
            start_char, end_char = index
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -831,7 +831,7 @@ cdef class Doc:
            col = attrs.index(ENT_IOB)
            for i in range(length):
                if array[i, col] not in range(0, len(iob_strings)):
-                    raise ValueError(Errors.E985.format(values=iob_strings, value=array[i, col]))
+                    raise ValueError(Errors.E982.format(values=iob_strings, value=array[i, col]))
        # Now load the data
        for i in range(length):
            token = &self.c[i]