Raise errors for missing strings in Span constructor

This commit is contained in:
Adriane Boyd 2023-03-17 12:22:23 +01:00
parent bf4005eb5e
commit de9727a98b
3 changed files with 16 additions and 2 deletions

View File

@ -356,7 +356,7 @@ class Errors(metaclass=ErrorsWithCodes):
"match.")
E083 = ("Error setting extension: only one of `default`, `method`, or "
"`getter` (plus optional `setter`) is allowed. Got: {nr_defined}")
E084 = ("Error assigning label ID {label} to span: not in StringStore.")
E084 = ("Error assigning {name} ID '{value}' to span: not in StringStore.")
E085 = ("Can't create lexeme for string '{string}'.")
E087 = ("Unknown displaCy style: {style}.")
E088 = ("Text of length {length} exceeds maximum of {max_length}. The "

View File

@ -456,6 +456,16 @@ def test_span_string_label_id(doc):
assert span.id == doc.vocab.strings["Q342"]
def test_span_attrs_in_strings(doc):
unusual_label = "test_spans_attrs_in_strings_label"
with pytest.raises(ValueError):
span = Span(doc, 0, 1, label=doc.vocab.strings[unusual_label])
with pytest.raises(ValueError):
span = Span(doc, 0, 1, kb_id=doc.vocab.strings[unusual_label])
with pytest.raises(ValueError):
span = Span(doc, 0, 1, span_id=doc.vocab.strings[unusual_label])
def test_span_attrs_writable(doc):
span = Span(doc, 0, 1)
span.label_ = "label"

View File

@ -107,7 +107,11 @@ cdef class Span:
if isinstance(span_id, str):
span_id = doc.vocab.strings.add(span_id)
if label not in doc.vocab.strings:
raise ValueError(Errors.E084.format(label=label))
raise ValueError(Errors.E084.format(name="label", value=label))
if kb_id not in doc.vocab.strings:
raise ValueError(Errors.E084.format(name="kb_id", value=kb_id))
if span_id not in doc.vocab.strings:
raise ValueError(Errors.E084.format(name="span_id", value=span_id))
start_char = doc[start].idx if start < doc.length else len(doc.text)
if start == end: