From f6a4b80c0b76f93f1714b8f2e6f1cb87d16e49f9 Mon Sep 17 00:00:00 2001 From: kadarakos Date: Wed, 25 May 2022 11:12:29 +0200 Subject: [PATCH] Better errors for has_annotation and Matcher (#10830) * Show input argument instead of None * catch invalid attr early * moved error message from code to errors.py * Update spacy/errors.py Co-authored-by: Adriane Boyd * Update spacy/errors.py * update E153 and E154 Co-authored-by: Adriane Boyd --- spacy/errors.py | 5 +++-- spacy/matcher/matcher.pyx | 3 ++- spacy/tokens/doc.pyx | 5 +++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/spacy/errors.py b/spacy/errors.py index 67458fb52..c82ffe882 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -449,10 +449,10 @@ class Errors(metaclass=ErrorsWithCodes): "same, but found '{nlp}' and '{vocab}' respectively.") E152 = ("The attribute {attr} is not supported for token patterns. " "Please use the option `validate=True` with the Matcher, PhraseMatcher, " - "or EntityRuler for more details.") + "EntityRuler or AttributeRuler for more details.") E153 = ("The value type {vtype} is not supported for token patterns. " "Please use the option validate=True with Matcher, PhraseMatcher, " - "or EntityRuler for more details.") + "EntityRuler or AttributeRuler for more details.") E154 = ("One of the attributes or values is not supported for token " "patterns. Please use the option `validate=True` with the Matcher, " "PhraseMatcher, or EntityRuler for more details.") @@ -918,6 +918,7 @@ class Errors(metaclass=ErrorsWithCodes): E1034 = ("Node index {i} out of bounds ({length})") E1035 = ("Token index {i} out of bounds ({length})") E1036 = ("Cannot index into NoneNode") + E1037 = ("Invalid attribute value '{attr}'.") # Deprecated model shortcuts, only used in errors and warnings diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index e43583e30..981c5cdd2 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -786,6 +786,7 @@ def _preprocess_pattern(token_specs, vocab, extensions_table, extra_predicates): def _get_attr_values(spec, string_store): attr_values = [] for attr, value in spec.items(): + input_attr = attr if isinstance(attr, str): attr = attr.upper() if attr == '_': @@ -814,7 +815,7 @@ def _get_attr_values(spec, string_store): attr_values.append((attr, value)) else: # should be caught in validation - raise ValueError(Errors.E152.format(attr=attr)) + raise ValueError(Errors.E152.format(attr=input_attr)) return attr_values diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index c36e3a02f..d25247b13 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -414,6 +414,7 @@ cdef class Doc: """ # empty docs are always annotated + input_attr = attr if self.length == 0: return True cdef int i @@ -423,6 +424,10 @@ cdef class Doc: elif attr == "IS_SENT_END" or attr == self.vocab.strings["IS_SENT_END"]: attr = SENT_START attr = intify_attr(attr) + if attr is None: + raise ValueError( + Errors.E1037.format(attr=input_attr) + ) # adjust attributes if attr == HEAD: # HEAD does not have an unset state, so rely on DEP