From d1b27fe5aa2e098423c0a9443bf9159bc66c3e3d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 16 May 2018 00:31:52 +0200 Subject: [PATCH] Revert "Improve dynamic oracle when values are missing in parse" This reverts commit f56bd4736b3909922bdc844618a27ea2b6d97b1d. --- spacy/syntax/arc_eager.pyx | 41 +++++++++++++++----------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/spacy/syntax/arc_eager.pyx b/spacy/syntax/arc_eager.pyx index cc6d8f6f9..2dd269a53 100644 --- a/spacy/syntax/arc_eager.pyx +++ b/spacy/syntax/arc_eager.pyx @@ -58,9 +58,9 @@ cdef weight_t push_cost(StateClass stcls, const GoldParseC* gold, int target) no cdef int i, S_i for i in range(stcls.stack_depth()): S_i = stcls.S(i) - if gold.has_dep[target] and gold.heads[target] == S_i: + if gold.heads[target] == S_i: cost += 1 - if gold.has_dep[S_i] and gold.heads[S_i] == target and (NON_MONOTONIC or not stcls.has_head(S_i)): + if gold.heads[S_i] == target and (NON_MONOTONIC or not stcls.has_head(S_i)): cost += 1 if BINARY_COSTS and cost >= 1: return cost @@ -73,12 +73,10 @@ cdef weight_t pop_cost(StateClass stcls, const GoldParseC* gold, int target) nog cdef int i, B_i for i in range(stcls.buffer_length()): B_i = stcls.B(i) - if gold.has_dep[B_i]: - cost += gold.heads[B_i] == target - if gold.heads[B_i] == B_i or gold.heads[B_i] < target: - break - if gold.has_dep[target]: - cost += gold.heads[target] == B_i + cost += gold.heads[B_i] == target + cost += gold.heads[target] == B_i + if gold.heads[B_i] == B_i or gold.heads[B_i] < target: + break if BINARY_COSTS and cost >= 1: return cost if Break.is_valid(stcls.c, 0) and Break.move_cost(stcls, gold) == 0: @@ -109,10 +107,7 @@ cdef bint arc_is_gold(const GoldParseC* gold, int head, int child) nogil: cdef bint label_is_gold(const GoldParseC* gold, int head, int child, attr_t label) nogil: if not gold.has_dep[child]: - if label == SUBTOK_LABEL: - return False - else: - return True + return True elif label == 0: return True elif gold.labels[child] == label: @@ -172,7 +167,7 @@ cdef class Reduce: # Decrement cost for the arcs e save for i in range(1, st.stack_depth()): S_i = st.S(i) - if gold.has_dep[st.S(0)] and gold.heads[st.S(0)] == S_i: + if gold.heads[st.S(0)] == S_i: cost -= 1 if gold.heads[S_i] == st.S(0): cost -= 1 @@ -213,10 +208,8 @@ cdef class LeftArc: # Account for deps we might lose between S0 and stack if not s.has_head(s.S(0)): for i in range(1, s.stack_depth()): - if gold.has_dep[s.S(i)]: - cost += gold.heads[s.S(i)] == s.S(0) - if gold.has_dep[s.S(0)]: - cost += gold.heads[s.S(0)] == s.S(i) + cost += gold.heads[s.S(i)] == s.S(0) + cost += gold.heads[s.S(0)] == s.S(i) return cost + pop_cost(s, gold, s.S(0)) + arc_cost(s, gold, s.B(0), s.S(0)) @staticmethod @@ -291,20 +284,18 @@ cdef class Break: S_i = s.S(i) for j in range(s.buffer_length()): B_i = s.B(j) - if gold.has_dep[S_i]: - cost += gold.heads[S_i] == B_i - if gold.has_dep[B_i]: - cost += gold.heads[B_i] == S_i + cost += gold.heads[S_i] == B_i + cost += gold.heads[B_i] == S_i if cost != 0: return cost # Check for sentence boundary --- if it's here, we can't have any deps # between stack and buffer, so rest of action is irrelevant. - if not gold.has_dep[s.S(0)] or not gold.has_dep[s.B(0)]: + s0_root = _get_root(s.S(0), gold) + b0_root = _get_root(s.B(0), gold) + if s0_root != b0_root or s0_root == -1 or b0_root == -1: return cost - if gold.sent_start[s.B_(0).l_edge] == -1: - return cost+1 else: - return cost + return cost + 1 @staticmethod cdef inline weight_t label_cost(StateClass s, const GoldParseC* gold, attr_t label) nogil: