Better errors for has_annotation and Matcher (#10830)

* Show input argument instead of None * catch invalid attr early * moved error message from code to errors.py * Update spacy/errors.py Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Update spacy/errors.py * update E153 and E154 Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
2025-12-23 18:13:13 +03:00 · 2022-05-25 11:12:29 +02:00 · 2022-05-25 11:12:29 +02:00 · f6a4b80c0b
commit f6a4b80c0b
parent 83ed1f391b
3 changed files with 10 additions and 3 deletions
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -449,10 +449,10 @@ class Errors(metaclass=ErrorsWithCodes):
            "same, but found '{nlp}' and '{vocab}' respectively.")
    E152 = ("The attribute {attr} is not supported for token patterns. "
            "Please use the option `validate=True` with the Matcher, PhraseMatcher, "
-            "or EntityRuler for more details.")
+            "EntityRuler or AttributeRuler for more details.")
    E153 = ("The value type {vtype} is not supported for token patterns. "
            "Please use the option validate=True with Matcher, PhraseMatcher, "
-            "or EntityRuler for more details.")
+            "EntityRuler or AttributeRuler for more details.")
    E154 = ("One of the attributes or values is not supported for token "
            "patterns. Please use the option `validate=True` with the Matcher, "
            "PhraseMatcher, or EntityRuler for more details.")
@ -918,6 +918,7 @@ class Errors(metaclass=ErrorsWithCodes):
    E1034 = ("Node index {i} out of bounds ({length})")
    E1035 = ("Token index {i} out of bounds ({length})")
    E1036 = ("Cannot index into NoneNode")
    E1037 = ("Invalid attribute value '{attr}'.")
 # Deprecated model shortcuts, only used in errors and warnings
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@ -786,6 +786,7 @@ def _preprocess_pattern(token_specs, vocab, extensions_table, extra_predicates):
 def _get_attr_values(spec, string_store):
    attr_values = []
    for attr, value in spec.items():
        input_attr = attr
        if isinstance(attr, str):
            attr = attr.upper()
            if attr == '_':
@ -814,7 +815,7 @@ def _get_attr_values(spec, string_store):
            attr_values.append((attr, value))
        else:
            # should be caught in validation
-            raise ValueError(Errors.E152.format(attr=attr))
+            raise ValueError(Errors.E152.format(attr=input_attr))
    return attr_values
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -414,6 +414,7 @@ cdef class Doc:
        """
        # empty docs are always annotated
        input_attr = attr
        if self.length == 0:
            return True
        cdef int i
@ -423,6 +424,10 @@ cdef class Doc:
        elif attr == "IS_SENT_END" or attr == self.vocab.strings["IS_SENT_END"]:
            attr = SENT_START
        attr = intify_attr(attr)
        if attr is None:
            raise ValueError(
                Errors.E1037.format(attr=input_attr)
            )
        # adjust attributes
        if attr == HEAD:
            # HEAD does not have an unset state, so rely on DEP