Fix sent start in NewExample

This commit is contained in:
Matthew Honnibal 2020-06-09 23:58:16 +02:00
parent ad547a4b8f
commit 337d2b5ad6

View File

@ -94,13 +94,16 @@ def _annot2array(strings, tok_annot, doc_annot):
elif key == "HEAD": elif key == "HEAD":
attrs.append(key) attrs.append(key)
values.append([h-i for i, h in enumerate(value)]) values.append([h-i for i, h in enumerate(value)])
elif key == "SENT_START":
attrs.append(key)
values.append(value)
else: else:
attrs.append(key) attrs.append(key)
values.append([strings.add(v) for v in value]) values.append([strings.add(v) for v in value])
# TODO: Calculate token.ent_kb_id from doc_annot["links"]. # TODO: Calculate token.ent_kb_id from doc_annot["links"].
# We need to fix this and the doc.ents thing, both should be doc # We need to fix this and the doc.ents thing, both should be doc
# annotations. # annotations.
array = numpy.array(values, dtype="uint64") array = numpy.asarray(values, dtype="uint64")
return attrs, array.T return attrs, array.T