From 36d49a0f13e8a17185a8ee821738e57c55c3848d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 9 Jun 2020 15:43:19 +0200 Subject: [PATCH] Fix NewExample class --- spacy/gold/new_example.pyx | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/spacy/gold/new_example.pyx b/spacy/gold/new_example.pyx index 7f081ffbd..3c42c0bb1 100644 --- a/spacy/gold/new_example.pyx +++ b/spacy/gold/new_example.pyx @@ -8,7 +8,7 @@ from .align import Alignment from ..errors import Errors, AlignmentError -cpdef Doc annotations2doc(Doc predicted, doc_annot, tok_annot): +cpdef Doc annotations2doc(Doc predicted, tok_annot, doc_annot): # TODO: Improve and test this words = tok_annot.get("ORTH", [tok.text for tok in predicted]) attrs, array = _annot2array(predicted.vocab.strings, tok_annot, doc_annot) @@ -83,16 +83,19 @@ def _annot2array(strings, tok_annot, doc_annot): for key, value in tok_annot.items(): if key not in IDS: raise ValueError(f"Unknown attr: {key}") - if key == "HEAD": + elif key == "ORTH": + pass + elif key == "HEAD": + attrs.append(key) values.append([h-i for i, h in enumerate(value)]) else: + attrs.append(key) values.append([strings.add(v) for v in value]) - attrs.append(key) # TODO: Calculate token.ent_kb_id from doc_annot["links"]. # We need to fix this and the doc.ents thing, both should be doc # annotations. array = numpy.array(values, dtype="uint64") - return attrs, array + return attrs, array.T def _parse_example_dict_data(example_dict):