diff --git a/spacy/errors.py b/spacy/errors.py index c897c29ff..9fd41d425 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -356,7 +356,7 @@ class Errors(metaclass=ErrorsWithCodes): "match.") E083 = ("Error setting extension: only one of `default`, `method`, or " "`getter` (plus optional `setter`) is allowed. Got: {nr_defined}") - E084 = ("Error assigning label ID {label} to span: not in StringStore.") + E084 = ("Error assigning {name} ID '{value}' to span: not in StringStore.") E085 = ("Can't create lexeme for string '{string}'.") E087 = ("Unknown displaCy style: {style}.") E088 = ("Text of length {length} exceeds maximum of {max_length}. The " diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py index adef5922f..056c41bbd 100644 --- a/spacy/tests/doc/test_span.py +++ b/spacy/tests/doc/test_span.py @@ -456,6 +456,16 @@ def test_span_string_label_id(doc): assert span.id == doc.vocab.strings["Q342"] +def test_span_attrs_in_strings(doc): + unusual_label = "test_spans_attrs_in_strings_label" + with pytest.raises(ValueError): + span = Span(doc, 0, 1, label=doc.vocab.strings[unusual_label]) + with pytest.raises(ValueError): + span = Span(doc, 0, 1, kb_id=doc.vocab.strings[unusual_label]) + with pytest.raises(ValueError): + span = Span(doc, 0, 1, span_id=doc.vocab.strings[unusual_label]) + + def test_span_attrs_writable(doc): span = Span(doc, 0, 1) span.label_ = "label" diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 7750b16ed..c41bb1cbe 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -107,7 +107,11 @@ cdef class Span: if isinstance(span_id, str): span_id = doc.vocab.strings.add(span_id) if label not in doc.vocab.strings: - raise ValueError(Errors.E084.format(label=label)) + raise ValueError(Errors.E084.format(name="label", value=label)) + if kb_id not in doc.vocab.strings: + raise ValueError(Errors.E084.format(name="kb_id", value=kb_id)) + if span_id not in doc.vocab.strings: + raise ValueError(Errors.E084.format(name="span_id", value=span_id)) start_char = doc[start].idx if start < doc.length else len(doc.text) if start == end: