From 85f2b04c4583160da5b254d45d97a9b5fbd099d1 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 30 Oct 2019 17:19:36 +0100 Subject: [PATCH] Support span._. in component decorator attrs (#4555) * Support span._. in component decorator attrs * Adjust error [ci skip] --- spacy/analysis.py | 13 ++++++++----- spacy/errors.py | 3 ++- spacy/tests/pipeline/test_analysis.py | 3 ++- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/spacy/analysis.py b/spacy/analysis.py index 49f67fd83..761be3de9 100644 --- a/spacy/analysis.py +++ b/spacy/analysis.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals from collections import OrderedDict from wasabi import Printer -from .tokens import Doc, Token +from .tokens import Doc, Token, Span from .errors import Errors, Warnings, user_warning @@ -78,12 +78,15 @@ def validate_attrs(values): RETURNS (iterable): The checked attributes. """ data = dot_to_dict(values) - objs = {"doc": Doc, "token": Token} + objs = {"doc": Doc, "token": Token, "span": Span} for obj_key, attrs in data.items(): - if obj_key not in objs: # first element is not doc/token - if obj_key == "span": - span_attrs = [attr for attr in values if attr.startswith("span.")] + if obj_key == "span": + # Support Span only for custom extension attributes + span_attrs = [attr for attr in values if attr.startswith("span.")] + span_attrs = [attr for attr in span_attrs if not attr.startswith("span._.")] + if span_attrs: raise ValueError(Errors.E180.format(attrs=", ".join(span_attrs))) + if obj_key not in objs: # first element is not doc/token/span invalid_attrs = ", ".join(a for a in values if a.startswith(obj_key)) raise ValueError(Errors.E181.format(obj=obj_key, attrs=invalid_attrs)) if not isinstance(attrs, dict): # attr is something like "doc" diff --git a/spacy/errors.py b/spacy/errors.py index 7fe3a8a2d..c708f0a5b 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -515,7 +515,8 @@ class Errors(object): "in a list. For example: matcher.add('{key}', [doc])") E180 = ("Span attributes can't be declared as required or assigned by " "components, since spans are only views of the Doc. Use Doc and " - "Token attributes only and remove the following: {attrs}") + "Token attributes (or custom extension attributes) only and remove " + "the following: {attrs}") E181 = ("Received invalid attributes for unkown object {obj}: {attrs}. " "Only Doc and Token attributes are supported.") E182 = ("Received invalid attribute declaration: {attr}\nDid you forget " diff --git a/spacy/tests/pipeline/test_analysis.py b/spacy/tests/pipeline/test_analysis.py index 6e0354b18..6f8b93a6e 100644 --- a/spacy/tests/pipeline/test_analysis.py +++ b/spacy/tests/pipeline/test_analysis.py @@ -121,7 +121,7 @@ def test_component_factories_from_nlp(): def test_analysis_validate_attrs_valid(): - attrs = ["doc.sents", "doc.ents", "token.tag", "token._.xyz"] + attrs = ["doc.sents", "doc.ents", "token.tag", "token._.xyz", "span._.xyz"] assert validate_attrs(attrs) for attr in attrs: assert validate_attrs([attr]) @@ -139,6 +139,7 @@ def test_analysis_validate_attrs_valid(): "token.tag_", "token.tag.xyz", "token._.xyz.abc", + "span.label", ], ) def test_analysis_validate_attrs_invalid(attr):