diff --git a/spacy/errors.py b/spacy/errors.py index de4ffde3c..27091810d 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -690,8 +690,8 @@ class Errors: "in more than one span in entities, blocked, missing or outside.") E1011 = ("Unsupported default '{default}' in doc.set_ents. Available " "options: {modes}") - E1012 = ("Spans provided to doc.set_ents must be provided as a list of " - "`Span` objects.") + E1012 = ("Entity spans and blocked/missing/outside spans should be " + "provided to doc.set_ents as lists of `Span` objects.") E1013 = ("Unable to set entity for span with empty label. Entity spans are " "required to have a label. To set entity information as missing " "or blocked, use the keyword arguments with doc.set_ents.") diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 34742e587..4bf6f0e5e 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -8,6 +8,7 @@ from libc.stdint cimport int32_t, uint64_t import copy from collections import Counter from enum import Enum +import itertools import numpy import srsly from thinc.api import get_array_module @@ -742,28 +743,7 @@ cdef class Doc: # Find all tokens covered by spans and check that none are overlapping seen_tokens = set() - for span in entities: - if not isinstance(span, Span): - raise ValueError(Errors.E1012.format(span=span)) - for i in range(span.start, span.end): - if i in seen_tokens: - raise ValueError(Errors.E1010.format(i=i)) - seen_tokens.add(i) - for span in blocked: - if not isinstance(span, Span): - raise ValueError(Errors.E1012.format(span=span)) - for i in range(span.start, span.end): - if i in seen_tokens: - raise ValueError(Errors.E1010.format(i=i)) - seen_tokens.add(i) - for span in missing: - if not isinstance(span, Span): - raise ValueError(Errors.E1012.format(span=span)) - for i in range(span.start, span.end): - if i in seen_tokens: - raise ValueError(Errors.E1010.format(i=i)) - seen_tokens.add(i) - for span in outside: + for span in itertools.chain.from_iterable([entities, blocked, missing, outside]): if not isinstance(span, Span): raise ValueError(Errors.E1012.format(span=span)) for i in range(span.start, span.end):