mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
iob converter: add 'exception' for error 'too many values' (#3159)
* added contributor agreement * issue #3128 throw exception on bad IOB/2 formatting * Update spacy/cli/converters/iob2json.py with ValueError Co-Authored-By: gavrieltal <gtloria@protonmail.com>
This commit is contained in:
parent
e599ed9ef8
commit
9a5003d5c8
|
@ -35,9 +35,11 @@ def read_iob(raw_sents):
|
||||||
tokens = [re.split('[^\w\-]', line.strip())]
|
tokens = [re.split('[^\w\-]', line.strip())]
|
||||||
if len(tokens[0]) == 3:
|
if len(tokens[0]) == 3:
|
||||||
words, pos, iob = zip(*tokens)
|
words, pos, iob = zip(*tokens)
|
||||||
else:
|
elif len(tokens[0]) == 2:
|
||||||
words, iob = zip(*tokens)
|
words, iob = zip(*tokens)
|
||||||
pos = ['-'] * len(words)
|
pos = ['-'] * len(words)
|
||||||
|
else:
|
||||||
|
raise ValueError('The iob/iob2 file is not formatted correctly. Try checking whitespace and delimiters.')
|
||||||
biluo = iob_to_biluo(iob)
|
biluo = iob_to_biluo(iob)
|
||||||
sentences.append([
|
sentences.append([
|
||||||
{'orth': w, 'tag': p, 'ner': ent}
|
{'orth': w, 'tag': p, 'ner': ent}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user