From 59098a5f624c65ded64f963085a31c755de79859 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 21 Jun 2020 17:16:26 +0200 Subject: [PATCH] Add get_aligned_parse method in Example Fix Example.get_aligned_parse --- spacy/gold/example.pyx | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/spacy/gold/example.pyx b/spacy/gold/example.pyx index b5d1b1402..e90d8a42a 100644 --- a/spacy/gold/example.pyx +++ b/spacy/gold/example.pyx @@ -7,6 +7,8 @@ from .align cimport Alignment from .iob_utils import biluo_to_iob, biluo_tags_from_offsets, biluo_tags_from_doc from .align import Alignment from ..errors import Errors, AlignmentError +from ..structs cimport TokenC +from ..syntax import nonproj cpdef Doc annotations2doc(vocab, tok_annot, doc_annot): @@ -121,6 +123,23 @@ cdef class Example: output = [vocab.strings[o] if o is not None else o for o in output] return output + def get_aligned_parse(self, projectivize=True): + cand_to_gold = self.alignment.cand_to_gold + gold_to_cand = self.alignment.gold_to_cand + aligned_heads = [None] * self.x.length + aligned_deps = [None] * self.x.length + heads = [token.head.i for token in self.y] + deps = [token.dep_ for token in self.y] + heads, deps = nonproj.projectivize(heads, deps) + for cand_i in range(self.x.length): + gold_i = cand_to_gold[cand_i] + if gold_i is not None: # Alignment found + gold_head = gold_to_cand[heads[gold_i]] + if gold_head is not None: + aligned_heads[cand_i] = gold_head + aligned_deps[cand_i] = deps[gold_i] + return aligned_heads, aligned_deps + def to_dict(self): return { "doc_annotation": {