From 9a5003d5c8b03c9ae53f32d7b3fc987726cd5617 Mon Sep 17 00:00:00 2001 From: Gavriel Loria Date: Wed, 16 Jan 2019 07:44:16 -0500 Subject: [PATCH] iob converter: add 'exception' for error 'too many values' (#3159) * added contributor agreement * issue #3128 throw exception on bad IOB/2 formatting * Update spacy/cli/converters/iob2json.py with ValueError Co-Authored-By: gavrieltal --- spacy/cli/converters/iob2json.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/spacy/cli/converters/iob2json.py b/spacy/cli/converters/iob2json.py index 3f38a6e25..e159bfef9 100644 --- a/spacy/cli/converters/iob2json.py +++ b/spacy/cli/converters/iob2json.py @@ -35,9 +35,11 @@ def read_iob(raw_sents): tokens = [re.split('[^\w\-]', line.strip())] if len(tokens[0]) == 3: words, pos, iob = zip(*tokens) - else: + elif len(tokens[0]) == 2: words, iob = zip(*tokens) pos = ['-'] * len(words) + else: + raise ValueError('The iob/iob2 file is not formatted correctly. Try checking whitespace and delimiters.') biluo = iob_to_biluo(iob) sentences.append([ {'orth': w, 'tag': p, 'ner': ent}