* Hackishly support broken NER annotations. Should fix this.

This commit is contained in:
Matthew Honnibal 2015-05-27 19:14:31 +02:00
parent a7cee46fe9
commit d25d31442d

View File

@ -80,11 +80,15 @@ def tags_to_entities(tags):
entities = []
start = None
for i, tag in enumerate(tags):
if tag.startswith('O') or tag == '-':
assert not start
if tag.startswith('O'):
# TODO: We shouldn't be getting these malformed inputs. Fix this.
if start is not None:
start = None
continue
elif tag == '-':
continue
elif tag.startswith('I'):
assert start is not None, tags
assert start is not None, tags[:i]
continue
if tag.startswith('U'):
entities.append((tag[2:], i, i))