iob converter: add 'exception' for error 'too many values' (#3159)

* added contributor agreement

* issue #3128 throw exception on bad IOB/2 formatting

* Update spacy/cli/converters/iob2json.py with ValueError

Co-Authored-By: gavrieltal <gtloria@protonmail.com>
This commit is contained in:
Gavriel Loria 2019-01-16 07:44:16 -05:00 committed by Ines Montani
parent e599ed9ef8
commit 9a5003d5c8

View File

@ -35,9 +35,11 @@ def read_iob(raw_sents):
tokens = [re.split('[^\w\-]', line.strip())]
if len(tokens[0]) == 3:
words, pos, iob = zip(*tokens)
else:
elif len(tokens[0]) == 2:
words, iob = zip(*tokens)
pos = ['-'] * len(words)
else:
raise ValueError('The iob/iob2 file is not formatted correctly. Try checking whitespace and delimiters.')
biluo = iob_to_biluo(iob)
sentences.append([
{'orth': w, 'tag': p, 'ner': ent}