Better errors for has_annotation and Matcher (#10830)

* Show input argument instead of None

* catch invalid attr early

* moved error message from code to errors.py

* Update spacy/errors.py

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Update spacy/errors.py

* update E153 and E154

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
kadarakos 2022-05-25 11:12:29 +02:00 committed by GitHub
parent 83ed1f391b
commit f6a4b80c0b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 10 additions and 3 deletions

View File

@ -449,10 +449,10 @@ class Errors(metaclass=ErrorsWithCodes):
"same, but found '{nlp}' and '{vocab}' respectively.") "same, but found '{nlp}' and '{vocab}' respectively.")
E152 = ("The attribute {attr} is not supported for token patterns. " E152 = ("The attribute {attr} is not supported for token patterns. "
"Please use the option `validate=True` with the Matcher, PhraseMatcher, " "Please use the option `validate=True` with the Matcher, PhraseMatcher, "
"or EntityRuler for more details.") "EntityRuler or AttributeRuler for more details.")
E153 = ("The value type {vtype} is not supported for token patterns. " E153 = ("The value type {vtype} is not supported for token patterns. "
"Please use the option validate=True with Matcher, PhraseMatcher, " "Please use the option validate=True with Matcher, PhraseMatcher, "
"or EntityRuler for more details.") "EntityRuler or AttributeRuler for more details.")
E154 = ("One of the attributes or values is not supported for token " E154 = ("One of the attributes or values is not supported for token "
"patterns. Please use the option `validate=True` with the Matcher, " "patterns. Please use the option `validate=True` with the Matcher, "
"PhraseMatcher, or EntityRuler for more details.") "PhraseMatcher, or EntityRuler for more details.")
@ -918,6 +918,7 @@ class Errors(metaclass=ErrorsWithCodes):
E1034 = ("Node index {i} out of bounds ({length})") E1034 = ("Node index {i} out of bounds ({length})")
E1035 = ("Token index {i} out of bounds ({length})") E1035 = ("Token index {i} out of bounds ({length})")
E1036 = ("Cannot index into NoneNode") E1036 = ("Cannot index into NoneNode")
E1037 = ("Invalid attribute value '{attr}'.")
# Deprecated model shortcuts, only used in errors and warnings # Deprecated model shortcuts, only used in errors and warnings

View File

@ -786,6 +786,7 @@ def _preprocess_pattern(token_specs, vocab, extensions_table, extra_predicates):
def _get_attr_values(spec, string_store): def _get_attr_values(spec, string_store):
attr_values = [] attr_values = []
for attr, value in spec.items(): for attr, value in spec.items():
input_attr = attr
if isinstance(attr, str): if isinstance(attr, str):
attr = attr.upper() attr = attr.upper()
if attr == '_': if attr == '_':
@ -814,7 +815,7 @@ def _get_attr_values(spec, string_store):
attr_values.append((attr, value)) attr_values.append((attr, value))
else: else:
# should be caught in validation # should be caught in validation
raise ValueError(Errors.E152.format(attr=attr)) raise ValueError(Errors.E152.format(attr=input_attr))
return attr_values return attr_values

View File

@ -414,6 +414,7 @@ cdef class Doc:
""" """
# empty docs are always annotated # empty docs are always annotated
input_attr = attr
if self.length == 0: if self.length == 0:
return True return True
cdef int i cdef int i
@ -423,6 +424,10 @@ cdef class Doc:
elif attr == "IS_SENT_END" or attr == self.vocab.strings["IS_SENT_END"]: elif attr == "IS_SENT_END" or attr == self.vocab.strings["IS_SENT_END"]:
attr = SENT_START attr = SENT_START
attr = intify_attr(attr) attr = intify_attr(attr)
if attr is None:
raise ValueError(
Errors.E1037.format(attr=input_attr)
)
# adjust attributes # adjust attributes
if attr == HEAD: if attr == HEAD:
# HEAD does not have an unset state, so rely on DEP # HEAD does not have an unset state, so rely on DEP