diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index cfc2686a2..cc7be7e74 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -435,7 +435,12 @@ cdef class Doc: for i in range(self.length): token = &self.c[i] if token.ent_iob == 1: - assert start != -1 + if start == -1: + seq = ['%s|%s' % (t.text, t.ent_iob_) for t in self[i-5:i+5]] + raise ValueError( + "token.ent_iob values make invalid sequence: " + "I without B\n" + "{seq}".format(seq=' '.join(seq))) elif token.ent_iob == 2 or token.ent_iob == 0: if start != -1: output.append(Span(self, start, i, label=label))