Improve error message when entity sequence is inconsistent

This commit is contained in:
Matthew Honnibal 2018-03-26 07:13:34 +02:00
parent cbd2794be0
commit 99fbc7db33

View File

@ -421,7 +421,12 @@ cdef class Doc:
for i in range(self.length): for i in range(self.length):
token = &self.c[i] token = &self.c[i]
if token.ent_iob == 1: if token.ent_iob == 1:
assert start != -1 if start == -1:
seq = ['%s|%s' % (t.text, t.ent_iob_) for t in self[i-5:i+5]]
raise ValueError(
"token.ent_iob values make invalid sequence: "
"I without B\n"
"{seq}".format(seq=' '.join(seq)))
elif token.ent_iob == 2 or token.ent_iob == 0: elif token.ent_iob == 2 or token.ent_iob == 0:
if start != -1: if start != -1:
output.append(Span(self, start, i, label=label)) output.append(Span(self, start, i, label=label))