From 78487f3e6655060f6c4dab4ab110d57de89db0f5 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 24 May 2015 20:05:58 +0200 Subject: [PATCH] * Update parser oracle for missing heads --- spacy/syntax/arc_eager.pyx | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/spacy/syntax/arc_eager.pyx b/spacy/syntax/arc_eager.pyx index 67e9fb2e7..cb0918606 100644 --- a/spacy/syntax/arc_eager.pyx +++ b/spacy/syntax/arc_eager.pyx @@ -69,7 +69,7 @@ cdef class ArcEager(TransitionSystem): for i in range(gold.length): if gold.heads[i] is None: # Missing values gold.c_heads[i] = i - gold.c_labels[i] = self.strings[''] + gold.c_labels[i] = -1 else: gold.c_heads[i] = gold.heads[i] gold.c_labels[i] = self.strings[gold.labels[i]] @@ -252,7 +252,9 @@ cdef int _right_cost(const Transition* self, const State* s, GoldParse gold) exc if gold.c_heads[s.i] == s.stack[0]: cost += self.label != gold.c_labels[s.i] return cost - cost += head_in_buffer(s, s.i, gold.c_heads) + # This indicates missing head + if gold.c_labels[s.i] != -1: + cost += head_in_buffer(s, s.i, gold.c_heads) cost += children_in_stack(s, s.i, gold.c_heads) cost += head_in_stack(s, s.i, gold.c_heads) if NON_MONOTONIC: @@ -270,16 +272,18 @@ cdef int _left_cost(const Transition* self, const State* s, GoldParse gold) exce # If we're at EOL, then the left arc will add an arc to ROOT. elif at_eol(s): # Are we root? - cost += gold.c_heads[s.stack[0]] != s.stack[0] - # Are we labelling correctly? - cost += self.label != gold.c_labels[s.stack[0]] + if gold.c_labels[s.stack[0]] != -1: + cost += gold.c_heads[s.stack[0]] != s.stack[0] + # Are we labelling correctly? + cost += self.label != gold.c_labels[s.stack[0]] return cost cost += head_in_buffer(s, s.stack[0], gold.c_heads) cost += children_in_buffer(s, s.stack[0], gold.c_heads) if NON_MONOTONIC and s.stack_len >= 2: cost += gold.c_heads[s.stack[0]] == s.stack[-1] - cost += gold.c_heads[s.stack[0]] == s.stack[0] + if gold.c_labels[s.stack[0]] != -1: + cost += gold.c_heads[s.stack[0]] == s.stack[0] return cost