mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
iob converter: add 'exception' for error 'too many values' (#3159)
* added contributor agreement * issue #3128 throw exception on bad IOB/2 formatting * Update spacy/cli/converters/iob2json.py with ValueError Co-Authored-By: gavrieltal <gtloria@protonmail.com>
This commit is contained in:
parent
e599ed9ef8
commit
9a5003d5c8
|
@ -35,9 +35,11 @@ def read_iob(raw_sents):
|
|||
tokens = [re.split('[^\w\-]', line.strip())]
|
||||
if len(tokens[0]) == 3:
|
||||
words, pos, iob = zip(*tokens)
|
||||
else:
|
||||
elif len(tokens[0]) == 2:
|
||||
words, iob = zip(*tokens)
|
||||
pos = ['-'] * len(words)
|
||||
else:
|
||||
raise ValueError('The iob/iob2 file is not formatted correctly. Try checking whitespace and delimiters.')
|
||||
biluo = iob_to_biluo(iob)
|
||||
sentences.append([
|
||||
{'orth': w, 'tag': p, 'ner': ent}
|
||||
|
|
Loading…
Reference in New Issue
Block a user