From f6a4b80c0b76f93f1714b8f2e6f1cb87d16e49f9 Mon Sep 17 00:00:00 2001
From: kadarakos <kadar.akos@gmail.com>
Date: Wed, 25 May 2022 11:12:29 +0200
Subject: [PATCH] Better errors for has_annotation and Matcher (#10830)

* Show input argument instead of None

* catch invalid attr early

* moved error message from code to errors.py

* Update spacy/errors.py

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Update spacy/errors.py

* update E153 and E154

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
---
 spacy/errors.py           | 5 +++--
 spacy/matcher/matcher.pyx | 3 ++-
 spacy/tokens/doc.pyx      | 5 +++++
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 67458fb52..c82ffe882 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -449,10 +449,10 @@ class Errors(metaclass=ErrorsWithCodes):
             "same, but found '{nlp}' and '{vocab}' respectively.")
     E152 = ("The attribute {attr} is not supported for token patterns. "
             "Please use the option `validate=True` with the Matcher, PhraseMatcher, "
-            "or EntityRuler for more details.")
+            "EntityRuler or AttributeRuler for more details.")
     E153 = ("The value type {vtype} is not supported for token patterns. "
             "Please use the option validate=True with Matcher, PhraseMatcher, "
-            "or EntityRuler for more details.")
+            "EntityRuler or AttributeRuler for more details.")
     E154 = ("One of the attributes or values is not supported for token "
             "patterns. Please use the option `validate=True` with the Matcher, "
             "PhraseMatcher, or EntityRuler for more details.")
@@ -918,6 +918,7 @@ class Errors(metaclass=ErrorsWithCodes):
     E1034 = ("Node index {i} out of bounds ({length})")
     E1035 = ("Token index {i} out of bounds ({length})")
     E1036 = ("Cannot index into NoneNode")
+    E1037 = ("Invalid attribute value '{attr}'.")
 
 
 # Deprecated model shortcuts, only used in errors and warnings
diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx
index e43583e30..981c5cdd2 100644
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@@ -786,6 +786,7 @@ def _preprocess_pattern(token_specs, vocab, extensions_table, extra_predicates):
 def _get_attr_values(spec, string_store):
     attr_values = []
     for attr, value in spec.items():
+        input_attr = attr
         if isinstance(attr, str):
             attr = attr.upper()
             if attr == '_':
@@ -814,7 +815,7 @@ def _get_attr_values(spec, string_store):
             attr_values.append((attr, value))
         else:
             # should be caught in validation
-            raise ValueError(Errors.E152.format(attr=attr))
+            raise ValueError(Errors.E152.format(attr=input_attr))
     return attr_values
 
 
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index c36e3a02f..d25247b13 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -414,6 +414,7 @@ cdef class Doc:
         """
 
         # empty docs are always annotated
+        input_attr = attr
         if self.length == 0:
             return True
         cdef int i
@@ -423,6 +424,10 @@ cdef class Doc:
         elif attr == "IS_SENT_END" or attr == self.vocab.strings["IS_SENT_END"]:
             attr = SENT_START
         attr = intify_attr(attr)
+        if attr is None:
+            raise ValueError(
+                Errors.E1037.format(attr=input_attr)
+            )
         # adjust attributes
         if attr == HEAD:
             # HEAD does not have an unset state, so rely on DEP