* Refactor transition system to use classes with staticmethods.

2025-08-07 21:54:54 +03:00 · 2015-06-05 02:27:17 +02:00 · 2015-06-05 02:27:17 +02:00 · 6bf35cecc3
commit 6bf35cecc3
parent 36a34d544b
5 changed files with 521 additions and 462 deletions
--- a/spacy/syntax/arc_eager.pyx
+++ b/spacy/syntax/arc_eager.pyx
@ -46,6 +46,306 @@ MOVE_NAMES[CONSTITUENT] = 'C'
 MOVE_NAMES[ADJUST] = 'A'


+cdef class Shift:
+    @staticmethod
+    cdef bint is_valid(const State* s, int label) except -1:
+        return not at_eol(s)
+
+    @staticmethod
+    cdef int transition(State* state, int label) except -1:
+        # Set the dep label, in case we need it after we reduce
+        if NON_MONOTONIC:
+            state.sent[state.i].dep = label
+        push_stack(state)
+
+    @staticmethod
+    cdef int cost(const State* s, const GoldParseC* gold, int label) except -1:
+        if not Shift.is_valid(s, label):
+            return 9000
+        cost = 0
+        cost += head_in_stack(s, s.i, gold.heads)
+        cost += children_in_stack(s, s.i, gold.heads)
+        # If we can break, and there's no cost to doing so, we should
+        if Break.is_valid(s, label) and Break.cost(s, gold, -1) == 0:
+            cost += 1
+        return cost
+
+
+cdef class Reduce:
+    @staticmethod
+    cdef bint is_valid(const State* s, int label) except -1:
+        if NON_MONOTONIC:
+            return s.stack_len >= 2 #and not missing_brackets(s)
+        else:
+            return s.stack_len >= 2 and has_head(get_s0(s))
+
+    @staticmethod
+    cdef int transition(State* state, int label) except -1:
+        if NON_MONOTONIC and not has_head(get_s0(state)):
+            add_dep(state, state.stack[-1], state.stack[0], get_s0(state).dep)
+        pop_stack(state)
+
+    @staticmethod
+    cdef int cost(const State* s, const GoldParseC* gold, int label) except -1:
+        if not Reduce.is_valid(s, label):
+            return 9000
+        cdef int cost = 0
+        cost += children_in_buffer(s, s.stack[0], gold.heads)
+        if NON_MONOTONIC:
+            cost += head_in_buffer(s, s.stack[0], gold.heads)
+        return cost
+
+
+cdef class LeftArc:
+    @staticmethod
+    cdef bint is_valid(const State* s, int label) except -1:
+        if NON_MONOTONIC:
+            return s.stack_len >= 1 #and not missing_brackets(s)
+        else:
+            return s.stack_len >= 1 and not has_head(get_s0(s))
+
+    @staticmethod
+    cdef int transition(State* state, int label) except -1:
+        # Interpret left-arcs from EOL as attachment to root
+        if at_eol(state):
+            add_dep(state, state.stack[0], state.stack[0], label)
+        else:
+            add_dep(state, state.i, state.stack[0], label)
+        pop_stack(state)
+
+    @staticmethod
+    cdef int cost(const State* s, const GoldParseC* gold, int label) except -1:
+        if not LeftArc.is_valid(s, label):
+            return 9000
+        cdef int cost = 0
+        if gold.heads[s.stack[0]] == s.i:
+            cost += label != -1 and label != gold.labels[s.stack[0]]
+            return cost
+        # If we're at EOL, then the left arc will add an arc to ROOT.
+        elif at_eol(s):
+            # Are we root?
+            if gold.labels[s.stack[0]] != -1:
+                # If we're at EOL, prefer to reduce or break over left-arc
+                if Reduce.is_valid(s, -1) or Break.is_valid(s, -1): 
+                    cost += gold.heads[s.stack[0]] != s.stack[0]
+                    # Are we labelling correctly?
+                    cost += label != -1 and label != gold.labels[s.stack[0]]
+                    return cost
+        cost += head_in_buffer(s, s.stack[0], gold.heads)
+        cost += children_in_buffer(s, s.stack[0], gold.heads)
+        if NON_MONOTONIC and s.stack_len >= 2:
+            cost += gold.heads[s.stack[0]] == s.stack[-1]
+        if gold.labels[s.stack[0]] != -1:
+            cost += gold.heads[s.stack[0]] == s.stack[0]
+        return cost
+
+
+cdef class RightArc:
+    @staticmethod
+    cdef bint is_valid(const State* s, int label) except -1:
+        return s.stack_len >= 1 and not at_eol(s)
+
+    @staticmethod
+    cdef int transition(State* state, int label) except -1:
+        add_dep(state, state.stack[0], state.i, label)
+        push_stack(state)
+
+    @staticmethod
+    cdef int cost(const State* s, const GoldParseC* gold, int label) except -1:
+        if not RightArc.is_valid(s, label):
+            return 9000
+        cdef int cost
+        cost = 0
+        if gold.heads[s.i] == s.stack[0]:
+            cost += label != -1 and label != gold.labels[s.i]
+            return cost
+        # This indicates missing head
+        if gold.labels[s.i] != -1:
+            cost += head_in_buffer(s, s.i, gold.heads)
+        cost += children_in_stack(s, s.i, gold.heads)
+        cost += head_in_stack(s, s.i, gold.heads)
+        return cost
+
+
+cdef class Break:
+    @staticmethod
+    cdef bint is_valid(const State* s, int label) except -1:
+        cdef int i
+        if not USE_BREAK:
+            return False
+        elif at_eol(s):
+            return False
+        #elif NON_MONOTONIC:
+        #    return True
+        else:
+            # In the Break transition paper, they have this constraint that prevents
+            # Break if stack is disconnected. But, if we're doing non-monotonic parsing,
+            # we prefer to relax this constraint. This is helpful in parsing whole
+            # documents, because then we don't get stuck with words on the stack.
+            seen_headless = False
+            for i in range(s.stack_len):
+                if s.sent[s.stack[-i]].head == 0:
+                    if seen_headless:
+                        return False
+                    else:
+                        seen_headless = True
+            # TODO: Constituency constraints
+            return True
+
+    @staticmethod
+    cdef int transition(State* state, int label) except -1:
+        state.sent[state.i-1].sent_end = True
+        while state.stack_len != 0:
+            if get_s0(state).head == 0:
+                get_s0(state).dep = label
+            state.stack -= 1
+            state.stack_len -= 1
+        if not at_eol(state):
+            push_stack(state)
+
+    @staticmethod
+    cdef int cost(const State* s, const GoldParseC* gold, int label) except -1:
+        if not Break.is_valid(s, label):
+            return 9000
+        # When we break, we Reduce all of the words on the stack.
+        cdef int cost = 0
+        # Number of deps between S0...Sn and N0...Nn
+        for i in range(s.i, s.sent_len):
+            cost += children_in_stack(s, i, gold.heads)
+            cost += head_in_stack(s, i, gold.heads)
+        return cost
+
+
+cdef class Constituent:
+    @staticmethod
+    cdef bint is_valid(const State* s, int label) except -1:
+        if s.stack_len < 1:
+            return False
+        return False
+    #else:
+    #    # If all stack elements are popped, can't constituent
+    #    for i in range(s.ctnts.stack_len):
+    #        if not s.ctnts.is_popped[-i]:
+    #            return True
+    #    else:
+    #        return False
+
+    @staticmethod
+    cdef int transition(State* state, int label) except -1:
+        return False
+        #cdef Constituent* bracket = new_bracket(state.ctnts)
+
+        #bracket.parent = NULL
+        #bracket.label = self.label
+        #bracket.head = get_s0(state)
+        #bracket.length = 0
+
+        #attach(bracket, state.ctnts.stack)
+        # Attach rightward children. They're in the brackets array somewhere
+        # between here and B0.
+        #cdef Constituent* node
+        #cdef const TokenC* node_gov
+        #for i in range(1, bracket - state.ctnts.stack):
+        #    node = bracket - i
+        #    node_gov = node.head + node.head.head
+        #    if node_gov == bracket.head:
+        #        attach(bracket, node)
+
+    @staticmethod
+    cdef int cost(const State* s, const GoldParseC* gold, int label) except -1:
+        if not Constituent.is_valid(s, label):
+            return 9000
+        raise Exception("Constituent move should be disabled currently")
+        # The gold standard is indexed by end, then by start, then a set of labels
+        #brackets = gold.brackets(get_s0(s).r_edge, {})
+        #if not brackets:
+        #    return 2 # 2 loss for bad bracket, only 1 for good bracket bad label
+        # Index the current brackets in the state
+        #existing = set()
+        #for i in range(s.ctnt_len):
+        #    if ctnt.end == s.r_edge and ctnt.label == self.label:
+        #        existing.add(ctnt.start)
+        #cdef int loss = 2
+        #cdef const TokenC* child
+        #cdef const TokenC* s0 = get_s0(s)
+        #cdef int n_left = count_left_kids(s0)
+        # Iterate over the possible start positions, and check whether we have a
+        # (start, end, label) match to the gold tree
+        #for i in range(1, n_left):
+        #    child = get_left(s, s0, i)
+        #    if child.l_edge in brackets and child.l_edge not in existing:
+        #        if self.label in brackets[child.l_edge]
+        #            return 0
+        #        else:
+        #            loss = 1 # If we see the start position, set loss to 1
+        #return loss
+
+
+cdef class Adjust:
+    @staticmethod
+    cdef bint is_valid(const State* s, int label) except -1:
+        return False
+        #if s.ctnts.stack_len < 2:
+        #    return False
+
+        #cdef const Constituent* b1 = s.ctnts.stack[-1]
+        #cdef const Constituent* b0 = s.ctnts.stack[0]
+
+        #if (b1.head + b1.head.head) != b0.head:
+        #    return False
+        #elif b0.head >= b1.head:
+        #    return False
+        #elif b0 >= b1:
+        #    return False
+
+    @staticmethod
+    cdef int transition(State* state, int label) except -1:
+        return False
+        #cdef Constituent* b0 = state.ctnts.stack[0]
+        #cdef Constituent* b1 = state.ctnts.stack[1]
+
+        #assert (b1.head + b1.head.head) == b0.head
+        #assert b0.head < b1.head
+        #assert b0 < b1
+
+        #attach(b0, b1)
+        ## Pop B1 from stack, but keep B0 on top
+        #state.ctnts.stack -= 1
+        #state.ctnts.stack[0] = b0
+
+    @staticmethod
+    cdef int cost(const State* s, const GoldParseC* gold, int label) except -1:
+        if not Adjust.is_valid(s, label):
+            return 9000
+        raise Exception("Adjust move should be disabled currently")
+        # The gold standard is indexed by end, then by start, then a set of labels
+        #gold_starts = gold.brackets(get_s0(s).r_edge, {})
+        # Case 1: There are 0 brackets ending at this word.
+        # --> Cost is sunk, but must allow brackets to begin
+        #if not gold_starts:
+        #    return 0
+        # Is the top bracket correct?
+        #gold_labels = gold_starts.get(s.ctnt.start, set())
+        # TODO: Case where we have a unary rule
+        # TODO: Case where two brackets end on this word, with top bracket starting
+        # before
+
+        #cdef const TokenC* child
+        #cdef const TokenC* s0 = get_s0(s)
+        #cdef int n_left = count_left_kids(s0)
+        #cdef int i
+        # Iterate over the possible start positions, and check whether we have a
+        # (start, end, label) match to the gold tree
+        #for i in range(1, n_left):
+        #    child = get_left(s, s0, i)
+        #    if child.l_edge in brackets:
+        #        if self.label in brackets[child.l_edge]:
+        #            return 0
+        #        else:
+        #            loss = 1 # If we see the start position, set loss to 1
+        #return loss
+
+
 cdef class ArcEager(TransitionSystem):
    @classmethod
    def get_labels(cls, gold_parses):
@ -106,26 +406,33 @@ cdef class ArcEager(TransitionSystem):
        t.move = move
        t.label = label
        if move == SHIFT:
-            t.do = _do_shift
-            t.get_cost = _shift_cost
+            t.is_valid = Shift.is_valid
+            t.do = Shift.transition
+            t.get_cost = Shift.cost
        elif move == REDUCE:
-            t.do = _do_reduce
-            t.get_cost = _reduce_cost
+            t.is_valid = Reduce.is_valid
+            t.do = Reduce.transition
+            t.get_cost = Reduce.cost
        elif move == LEFT:
-            t.do = _do_left
-            t.get_cost = _left_cost
+            t.is_valid = LeftArc.is_valid
+            t.do = LeftArc.transition
+            t.get_cost = LeftArc.cost
        elif move == RIGHT:
-            t.do = _do_right
-            t.get_cost = _right_cost
+            t.is_valid = RightArc.is_valid
+            t.do = RightArc.transition
+            t.get_cost = RightArc.cost
        elif move == BREAK:
-            t.do = _do_break
-            t.get_cost = _break_cost
+            t.is_valid = Break.is_valid
+            t.do = Break.transition
+            t.get_cost = Break.cost
        elif move == CONSTITUENT:
-            t.do = _do_constituent
-            t.get_cost = _constituent_cost
+            t.is_valid = Constituent.is_valid
+            t.do = Constituent.transition
+            t.get_cost = Constituent.cost
        elif move == ADJUST:
-            t.do = _do_adjust
-            t.get_cost = _adjust_cost
+            t.is_valid = Adjust.is_valid
+            t.do = Adjust.transition
+            t.get_cost = Adjust.cost
        else:
            raise Exception(move)
        return t
@ -139,15 +446,15 @@ cdef class ArcEager(TransitionSystem):
            if state.sent[i].head == 0 and state.sent[i].dep == 0:
                state.sent[i].dep = root_label

-    cdef int set_valid(self, bint* output, const State* s) except -1:
+    cdef int set_valid(self, bint* output, const State* state) except -1:
        cdef bint[N_MOVES] is_valid
-        is_valid[SHIFT] = _can_shift(s)
-        is_valid[REDUCE] = _can_reduce(s)
-        is_valid[LEFT] = _can_left(s)
-        is_valid[RIGHT] = _can_right(s)
-        is_valid[BREAK] = _can_break(s)
-        is_valid[CONSTITUENT] = _can_constituent(s)
-        is_valid[ADJUST] = _can_adjust(s)
+        is_valid[SHIFT] = Shift.is_valid(state, -1)
+        is_valid[REDUCE] = Reduce.is_valid(state, -1)
+        is_valid[LEFT] = LeftArc.is_valid(state, -1)
+        is_valid[RIGHT] = RightArc.is_valid(state, -1)
+        is_valid[BREAK] = Break.is_valid(state, -1)
+        is_valid[CONSTITUENT] = Constituent.is_valid(state, -1)
+        is_valid[ADJUST] = Adjust.is_valid(state, -1)
        cdef int i
        for i in range(self.n_moves):
            output[i] = is_valid[self.c[i].move]
@ -156,14 +463,14 @@ cdef class ArcEager(TransitionSystem):
        cdef Transition move
        move.label = -1
        cdef int[N_MOVES] move_costs
-        move_costs[SHIFT] = _shift_cost(&move, s, &gold.c)
-        move_costs[REDUCE] = _reduce_cost(&move, s, &gold.c)
-        move_costs[LEFT] = _left_cost(&move, s, &gold.c)
-        move_costs[RIGHT] = _right_cost(&move, s, &gold.c)
-        move_costs[BREAK] = _break_cost(&move, s, &gold.c)
-        move_costs[CONSTITUENT] = _constituent_cost(&move, s, &gold.c)
-        move_costs[ADJUST] = _adjust_cost(&move, s, &gold.c)
- 
+        move_costs[SHIFT] = Shift.cost(s, &gold.c, -1)
+        move_costs[REDUCE] = Reduce.cost(s, &gold.c, -1)
+        move_costs[LEFT] = LeftArc.cost(s, &gold.c, -1)
+        move_costs[RIGHT] = RightArc.cost(s, &gold.c, -1)
+        move_costs[BREAK] = Break.cost(s, &gold.c, -1)
+        move_costs[CONSTITUENT] = Constituent.cost(s, &gold.c, -1)
+        move_costs[ADJUST] = Adjust.cost(s, &gold.c, -1)
+
        cdef int i, label
        cdef int* labels = gold.c.labels
        cdef int* heads = gold.c.heads
@ -176,19 +483,19 @@ cdef class ArcEager(TransitionSystem):
                    label = labels[s.i]
                if move.move == LEFT and heads[s.stack[0]] == s.i:
                    label = labels[s.stack[0]]
-                elif move.move == LEFT and at_eol(s) and (_can_reduce(s) or _can_break(s)):
+                elif move.move == LEFT and at_eol(s) and (Reduce.is_valid(s, -1) or Break.is_valid(s, 1)):
                    label = labels[s.stack[0]]
                output[i] += move.label != label and label != -1

    cdef Transition best_valid(self, const weight_t* scores, const State* s) except *:
        cdef bint[N_MOVES] is_valid
-        is_valid[SHIFT] = _can_shift(s)
-        is_valid[REDUCE] = _can_reduce(s)
-        is_valid[LEFT] = _can_left(s)
-        is_valid[RIGHT] = _can_right(s)
-        is_valid[BREAK] = _can_break(s)
-        is_valid[CONSTITUENT] = _can_constituent(s)
-        is_valid[ADJUST] = _can_adjust(s)
+        is_valid[SHIFT] = Shift.is_valid(s, -1)
+        is_valid[REDUCE] = Reduce.is_valid(s, -1)
+        is_valid[LEFT] = LeftArc.is_valid(s, -1)
+        is_valid[RIGHT] = RightArc.is_valid(s, -1)
+        is_valid[BREAK] = Break.is_valid(s, -1)
+        is_valid[CONSTITUENT] = Constituent.is_valid(s, -1)
+        is_valid[ADJUST] = Adjust.is_valid(s, -1)
        cdef Transition best
        cdef weight_t score = MIN_SCORE
        cdef int i
@ -209,302 +516,3 @@ cdef class ArcEager(TransitionSystem):
        return best


-cdef class Shift:
-    @staticmethod
-    cdef inline bint is_valid(const State* s) nogil:
-        return not at_eol(s)
-
-    @staticmethod
-    cdef int transition(State* state, int label) except -1:
-        # Set the dep label, in case we need it after we reduce
-        if NON_MONOTONIC:
-            state.sent[state.i].dep = label
-        push_stack(state)
-
-    @staticmethod
-    cdef int cost(const State* s, GoldParseC* gold, int label) except -1:
-        if not _can_shift(s):
-            return 9000
-        cost = 0
-        cost += head_in_stack(s, s.i, gold.heads)
-        cost += children_in_stack(s, s.i, gold.heads)
-        # If we can break, and there's no cost to doing so, we should
-        if _can_break(s) and _break_cost(self, s, gold) == 0:
-            cost += 1
-        return cost
-
-
-cdef class Reduce:
-    @staticmethod
-    cdef inline bint is_valid(const State* s) nogil:
-        if NON_MONOTONIC:
-            return s.stack_len >= 2 #and not missing_brackets(s)
-        else:
-            return s.stack_len >= 2 and has_head(get_s0(s))
-
-    @staticmethod
-    cdef int transition(State* state, int label) except -1:
-        if NON_MONOTONIC and not has_head(get_s0(state)):
-            add_dep(state, state.stack[-1], state.stack[0], get_s0(state).dep)
-        pop_stack(state)
-
-    @staticmethod
-    cdef int cost(const State* s, GoldParseC* gold, int label) except -1:
-        if not Reduce.is_valid(s):
-            return 9000
-        cdef int cost = 0
-        cost += children_in_buffer(s, s.stack[0], gold.heads)
-        if NON_MONOTONIC:
-            cost += head_in_buffer(s, s.stack[0], gold.heads)
-        return cost
-
-
-cdef class LeftArc:
-    @staticmethod
-    cdef inline bint is_valid(const State* s) nogil:
-        if NON_MONOTONIC:
-            return s.stack_len >= 1 #and not missing_brackets(s)
-        else:
-            return s.stack_len >= 1 and not has_head(get_s0(s))
-
-    @staticmethod
-    cdef int transition(State* state, int label) except -1:
-        # Interpret left-arcs from EOL as attachment to root
-        if at_eol(state):
-            add_dep(state, state.stack[0], state.stack[0], label)
-        else:
-            add_dep(state, state.i, state.stack[0], label)
-        pop_stack(state)
-
-    @staticmethod
-    cdef int cost(const State* s, GoldParseC* gold, int label) except -1:
-        if not _can_left(s):
-            return 9000
-        cost = 0
-        if gold.heads[s.stack[0]] == s.i:
-            cost += self.label != -1 and self.label != gold.labels[s.stack[0]]
-            return cost
-        # If we're at EOL, then the left arc will add an arc to ROOT.
-        elif at_eol(s):
-            # Are we root?
-            if gold.labels[s.stack[0]] != -1:
-                # If we're at EOL, prefer to reduce or break over left-arc
-                if _can_reduce(s) or _can_break(s): 
-                    cost += gold.heads[s.stack[0]] != s.stack[0]
-                    # Are we labelling correctly?
-                    cost += label != -1 and label != gold.labels[s.stack[0]]
-                    return cost
-        cost += head_in_buffer(s, s.stack[0], gold.heads)
-        cost += children_in_buffer(s, s.stack[0], gold.heads)
-        if NON_MONOTONIC and s.stack_len >= 2:
-            cost += gold.heads[s.stack[0]] == s.stack[-1]
-        if gold.labels[s.stack[0]] != -1:
-            cost += gold.heads[s.stack[0]] == s.stack[0]
-        return cost
-
-
-cdef class RightArc:
-    @staticmethod
-    cdef inline bint is_valid(const State* s) nogil:
-        return s.stack_len >= 1 and not at_eol(s)
-
-    @staticmethod
-    cdef int transition(State* state, int label) except -1:
-        add_dep(state, state.stack[0], state.i, label)
-        push_stack(state)
-
-    @staticmethod
-    cdef int cost(const State* s, GoldParseC* gold, int label) except -1:
-        if not RightArc.is_valid(s):
-            return 9000
-        cost = 0
-        if gold.heads[s.i] == s.stack[0]:
-            cost += label != -1 and self.label != gold.labels[s.i]
-            return cost
-        # This indicates missing head
-        if gold.labels[s.i] != -1:
-            cost += head_in_buffer(s, s.i, gold.heads)
-        cost += children_in_stack(s, s.i, gold.heads)
-        cost += head_in_stack(s, s.i, gold.heads)
-        return cost
-
-
-cdef class Break:
-    @staticmethod
-    cdef inline bint is_valid(const State* s) nogil:
-        cdef int i
-        if not USE_BREAK:
-            return False
-        elif at_eol(s):
-            return False
-        #elif NON_MONOTONIC:
-        #    return True
-        else:
-            # In the Break transition paper, they have this constraint that prevents
-            # Break if stack is disconnected. But, if we're doing non-monotonic parsing,
-            # we prefer to relax this constraint. This is helpful in parsing whole
-            # documents, because then we don't get stuck with words on the stack.
-            seen_headless = False
-            for i in range(s.stack_len):
-                if s.sent[s.stack[-i]].head == 0:
-                    if seen_headless:
-                        return False
-                    else:
-                        seen_headless = True
-            # TODO: Constituency constraints
-            return True
-
-    @staticmethod
-    cdef int transition(State* state, int label) except -1:
-        state.sent[state.i-1].sent_end = True
-        while state.stack_len != 0:
-            if get_s0(state).head == 0:
-                get_s0(state).dep = label
-            state.stack -= 1
-            state.stack_len -= 1
-        if not at_eol(state):
-            push_stack(state)
-
-    @staticmethod
-    cdef int cost(const State* s, GoldParseC* gold, int label) except -1:
-        if not Break.is_valid(s):
-            return 9000
-        # When we break, we Reduce all of the words on the stack.
-        cdef int cost = 0
-        # Number of deps between S0...Sn and N0...Nn
-        for i in range(s.i, s.sent_len):
-            cost += children_in_stack(s, i, gold.heads)
-            cost += head_in_stack(s, i, gold.heads)
-        return cost
-
-
-cdef class Constituent:
-    @staticmethod
-    cdef inline bint is_valid(const State* s) nogil:
-        if s.stack_len < 1:
-            return False
-        return False
-    #else:
-    #    # If all stack elements are popped, can't constituent
-    #    for i in range(s.ctnts.stack_len):
-    #        if not s.ctnts.is_popped[-i]:
-    #            return True
-    #    else:
-    #        return False
-
-    @staticmethod
-    cdef int transition(State* state, int label) except -1:
-        return False
-        #cdef Constituent* bracket = new_bracket(state.ctnts)
-
-        #bracket.parent = NULL
-        #bracket.label = self.label
-        #bracket.head = get_s0(state)
-        #bracket.length = 0
-
-        #attach(bracket, state.ctnts.stack)
-        # Attach rightward children. They're in the brackets array somewhere
-        # between here and B0.
-        #cdef Constituent* node
-        #cdef const TokenC* node_gov
-        #for i in range(1, bracket - state.ctnts.stack):
-        #    node = bracket - i
-        #    node_gov = node.head + node.head.head
-        #    if node_gov == bracket.head:
-        #        attach(bracket, node)
-
-    @staticmethod
-    cdef int cost(const State* s, GoldParseC* gold, int label) except -1:
-        if not Constituent.is_valid(s):
-            return 9000
-        raise Exception("Constituent move should be disabled currently")
-        # The gold standard is indexed by end, then by start, then a set of labels
-        #brackets = gold.brackets(get_s0(s).r_edge, {})
-        #if not brackets:
-        #    return 2 # 2 loss for bad bracket, only 1 for good bracket bad label
-        # Index the current brackets in the state
-        #existing = set()
-        #for i in range(s.ctnt_len):
-        #    if ctnt.end == s.r_edge and ctnt.label == self.label:
-        #        existing.add(ctnt.start)
-        #cdef int loss = 2
-        #cdef const TokenC* child
-        #cdef const TokenC* s0 = get_s0(s)
-        #cdef int n_left = count_left_kids(s0)
-        # Iterate over the possible start positions, and check whether we have a
-        # (start, end, label) match to the gold tree
-        #for i in range(1, n_left):
-        #    child = get_left(s, s0, i)
-        #    if child.l_edge in brackets and child.l_edge not in existing:
-        #        if self.label in brackets[child.l_edge]
-        #            return 0
-        #        else:
-        #            loss = 1 # If we see the start position, set loss to 1
-        #return loss
-
-
-cdef class Adjust:
-    @staticmethod
-    cdef inline bint is_valid(const State* s) nogil:
-        return False
-        #if s.ctnts.stack_len < 2:
-        #    return False
-
-        #cdef const Constituent* b1 = s.ctnts.stack[-1]
-        #cdef const Constituent* b0 = s.ctnts.stack[0]
-
-        #if (b1.head + b1.head.head) != b0.head:
-        #    return False
-        #elif b0.head >= b1.head:
-        #    return False
-        #elif b0 >= b1:
-        #    return False
-
-    @staticmethod
-    cdef int transition(State* state) except -1:
-        return False
-        #cdef Constituent* b0 = state.ctnts.stack[0]
-        #cdef Constituent* b1 = state.ctnts.stack[1]
-
-        #assert (b1.head + b1.head.head) == b0.head
-        #assert b0.head < b1.head
-        #assert b0 < b1
-
-        #attach(b0, b1)
-        ## Pop B1 from stack, but keep B0 on top
-        #state.ctnts.stack -= 1
-        #state.ctnts.stack[0] = b0
-
-    @staticmethod
-    cdef int cost(const State* s, GoldParseC* gold, int label) except -1:
-        if not Adjust.is_valid(s):
-            return 9000
-        raise Exception("Adjust move should be disabled currently")
-        # The gold standard is indexed by end, then by start, then a set of labels
-        #gold_starts = gold.brackets(get_s0(s).r_edge, {})
-        # Case 1: There are 0 brackets ending at this word.
-        # --> Cost is sunk, but must allow brackets to begin
-        #if not gold_starts:
-        #    return 0
-        # Is the top bracket correct?
-        #gold_labels = gold_starts.get(s.ctnt.start, set())
-        # TODO: Case where we have a unary rule
-        # TODO: Case where two brackets end on this word, with top bracket starting
-        # before
-
-        #cdef const TokenC* child
-        #cdef const TokenC* s0 = get_s0(s)
-        #cdef int n_left = count_left_kids(s0)
-        #cdef int i
-        # Iterate over the possible start positions, and check whether we have a
-        # (start, end, label) match to the gold tree
-        #for i in range(1, n_left):
-        #    child = get_left(s, s0, i)
-        #    if child.l_edge in brackets:
-        #        if self.label in brackets[child.l_edge]:
-        #            return 0
-        #        else:
-        #            loss = 1 # If we see the start position, set loss to 1
-        #return loss
-
-
--- a/spacy/syntax/ner.pyx
+++ b/spacy/syntax/ner.pyx
@ -50,24 +50,6 @@ cdef bint _entity_is_sunk(const State *s, Transition* golds) except -1:
    else:
        return False

-
-cdef int _is_valid(int act, int label, const State* s) except -1:
-    if act == MISSING:
-        return False
-    elif act == BEGIN:
-        return label != 0 and not entity_is_open(s)
-    elif act == IN:
-        return entity_is_open(s) and label != 0 and s.ent.label == label
-    elif act == LAST:
-        return entity_is_open(s) and label != 0 and s.ent.label == label
-    elif act == UNIT:
-        return label != 0 and not entity_is_open(s)
-    elif act == OUT:
-        return not entity_is_open(s)
-    else:
-        raise UnknownMove(act, label)
-
-
 cdef class BiluoPushDown(TransitionSystem):
    @classmethod
    def get_labels(cls, gold_tuples):
@ -122,8 +104,32 @@ cdef class BiluoPushDown(TransitionSystem):
        t.clas = clas
        t.move = move
        t.label = label
-        t.do = do_funcs[move]
-        t.get_cost = _get_cost
+        if move == MISSING:
+            t.is_valid = Missing.is_valid
+            t.do = Missing.transition
+            t.get_cost = Missing.cost
+        elif move == BEGIN:
+            t.is_valid = Begin.is_valid
+            t.do = Begin.transition
+            t.get_cost = Begin.cost
+        elif move == IN:
+            t.is_valid = In.is_valid
+            t.do = In.transition
+            t.get_cost = In.cost
+        elif move == LAST:
+            t.is_valid = Last.is_valid
+            t.do = Last.transition
+            t.get_cost = Last.cost
+        elif move == UNIT:
+            t.is_valid = Unit.is_valid
+            t.do = Unit.transition
+            t.get_cost = Unit.cost
+        elif move == OUT:
+            t.is_valid = Out.is_valid
+            t.do = Out.transition
+            t.get_cost = Out.cost
+        else:
+            raise Exception(move)
        return t

    cdef Transition best_valid(self, const weight_t* scores, const State* s) except *:
@ -133,7 +139,7 @@ cdef class BiluoPushDown(TransitionSystem):
        cdef int i
        for i in range(self.n_moves):
            m = &self.c[i]
-            if _is_valid(m.move, m.label, s) and scores[i] > score:
+            if m.is_valid(s, m.label) and scores[i] > score:
                best = i
                score = scores[i]
        assert best >= 0
@ -145,138 +151,185 @@ cdef class BiluoPushDown(TransitionSystem):
        cdef int i
        for i in range(self.n_moves):
            m = &self.c[i]
-            output[i] = _is_valid(m.move, m.label, s)
+            output[i] = m.is_valid(s, m.label)


-cdef int _get_cost(const Transition* self, const State* s, GoldParseC* gold) except -1:
-    if not _is_valid(self.move, self.label, s):
+cdef class Missing:
+    @staticmethod
+    cdef bint is_valid(const State* s, int label) except -1:
+        return False
+
+    @staticmethod
+    cdef int transition(State* s, int label) except -1:
+        raise NotImplementedError
+
+    @staticmethod
+    cdef int cost(const State* s, const GoldParseC* gold, int label) except -1:
        return 9000
-    cdef bint is_sunk = _entity_is_sunk(s, gold.ner)
-    cdef int next_act = gold.ner[s.i+1].move if s.i < s.sent_len else OUT
-    cdef bint is_gold = _is_gold(self.move, self.label, gold.ner[s.i].move,
-                                 gold.ner[s.i].label, next_act, is_sunk)
-    return not is_gold


-cdef bint _is_gold(int act, int tag, int g_act, int g_tag,
-                   int next_act, bint is_sunk):
-    if g_act == MISSING:
-        return True
-    if act == BEGIN:
+cdef class Begin:
+    @staticmethod
+    cdef bint is_valid(const State* s, int label) except -1:
+        return label != 0 and not entity_is_open(s)
+
+    @staticmethod
+    cdef int transition(State* s, int label) except -1:
+        s.ent += 1
+        s.ents_len += 1
+        s.ent.start = s.i
+        s.ent.label = label
+        s.ent.end = 0
+        s.sent[s.i].ent_iob = 3
+        s.sent[s.i].ent_type = label
+        s.i += 1
+
+    @staticmethod
+    cdef int cost(const State* s, const GoldParseC* gold, int label) except -1:
+        cdef int g_act = gold.ner[s.i].move
+        cdef int g_tag = gold.ner[s.i].label
        if g_act == BEGIN:
            # B, Gold B --> Label match
-            return tag == g_tag
+            return label != g_tag
        else:
            # B, Gold I --> False (P)
            # B, Gold L --> False (P)
            # B, Gold O --> False (P)
            # B, Gold U --> False (P)
-            return False
-    elif act == IN:
+            return 1
+
+cdef class In:
+    @staticmethod
+    cdef bint is_valid(const State* s, int label) except -1:
+        return entity_is_open(s) and label != 0 and s.ent.label == label
+    
+    @staticmethod
+    cdef int transition(State* s, int label) except -1:
+        s.sent[s.i].ent_iob = 1
+        s.sent[s.i].ent_type = label
+        s.i += 1
+
+    @staticmethod
+    cdef int cost(const State* s, const GoldParseC* gold, int label) except -1:
+        cdef int next_act = gold.ner[s.i+1].move if s.i < s.sent_len else OUT
+        cdef int g_act = gold.ner[s.i].move
+        cdef int g_tag = gold.ner[s.i].label
+        cdef bint is_sunk = _entity_is_sunk(s, gold.ner)
+
        if g_act == BEGIN:
            # I, Gold B --> True (P of bad open entity sunk, R of this entity sunk)
-            return True
+            return 0
        elif g_act == IN:
            # I, Gold I --> True (label forced by prev, if mismatch, P and R both sunk)
-            return True
+            return 0
        elif g_act == LAST:
            # I, Gold L --> True iff this entity sunk and next tag == O
-            return is_sunk and (next_act == OUT or next_act == MISSING)
+            return not (is_sunk and (next_act == OUT or next_act == MISSING))
        elif g_act == OUT:
            # I, Gold O --> True iff next tag == O
-            return next_act == OUT or next_act == MISSING
+            return not (next_act == OUT or next_act == MISSING)
        elif g_act == UNIT:
            # I, Gold U --> True iff next tag == O
-            return next_act == OUT
-    elif act == LAST:
+            return next_act != OUT
+
+
+
+cdef class Last:
+    @staticmethod
+    cdef bint is_valid(const State* s, int label) except -1:
+        return entity_is_open(s) and label != 0 and s.ent.label == label
+
+    @staticmethod
+    cdef int transition(State* s, int label) except -1:
+        s.ent.end = s.i+1
+        s.sent[s.i].ent_iob = 1
+        s.sent[s.i].ent_type = label
+        s.i += 1
+
+    @staticmethod
+    cdef int cost(const State* s, const GoldParseC* gold, int label) except -1:
+        cdef int g_act = gold.ner[s.i].move
+        cdef int g_tag = gold.ner[s.i].label
+
        if g_act == BEGIN:
            # L, Gold B --> True
-            return True
+            return 0
        elif g_act == IN:
            # L, Gold I --> True iff this entity sunk
-            return is_sunk
+            return not _entity_is_sunk(s, gold.ner)
        elif g_act == LAST:
            # L, Gold L --> True
-            return True
+            return 0
        elif g_act == OUT:
            # L, Gold O --> True
-            return True
+            return 0
        elif g_act == UNIT:
            # L, Gold U --> True
-            return True
-    elif act == OUT:
-        if g_act == BEGIN:
-            # O, Gold B --> False
-            return False
-        elif g_act == IN:
-            # O, Gold I --> True
-            return True
-        elif g_act == LAST:
-            # O, Gold L --> True
-            return True
-        elif g_act == OUT:
-            # O, Gold O --> True
-            return True
-        elif g_act == UNIT:
-            # O, Gold U --> False
-            return False
-    elif act == UNIT:
+            return 0
+
+
+cdef class Unit:
+    @staticmethod
+    cdef bint is_valid(const State* s, int label) except -1:
+        return label != 0 and not entity_is_open(s)
+
+    @staticmethod
+    cdef int transition(State* s, int label) except -1:
+        s.ent += 1
+        s.ents_len += 1
+        s.ent.start = s.i
+        s.ent.label = label
+        s.ent.end = s.i+1
+        s.sent[s.i].ent_iob = 3
+        s.sent[s.i].ent_type = label
+        s.i += 1
+
+    @staticmethod
+    cdef int cost(const State* s, const GoldParseC* gold, int label) except -1:
+        cdef int g_act = gold.ner[s.i].move
+        cdef int g_tag = gold.ner[s.i].label
+
        if g_act == UNIT:
            # U, Gold U --> True iff tag match
-            return tag == g_tag
+            return label != g_tag
        else:
            # U, Gold B --> False
            # U, Gold I --> False
            # U, Gold L --> False
            # U, Gold O --> False
-            return False
+            return 1


-cdef int _do_begin(const Transition* self, State* s) except -1:
-    s.ent += 1
-    s.ents_len += 1
-    s.ent.start = s.i
-    s.ent.label = self.label
-    s.ent.end = 0
-    s.sent[s.i].ent_iob = 3
-    s.sent[s.i].ent_type = self.label
-    s.i += 1
+cdef class Out:
+    @staticmethod
+    cdef bint is_valid(const State* s, int label) except -1:
+        return not entity_is_open(s)

+    @staticmethod
+    cdef int transition(State* s, int label) except -1:
+        s.sent[s.i].ent_iob = 2
+        s.i += 1
+    
+    @staticmethod
+    cdef int cost(const State* s, const GoldParseC* gold, int label) except -1:
+        cdef int g_act = gold.ner[s.i].move
+        cdef int g_tag = gold.ner[s.i].label

-cdef int _do_in(const Transition* self, State* s) except -1:
-    s.sent[s.i].ent_iob = 1
-    s.sent[s.i].ent_type = self.label
-    s.i += 1
-
-
-cdef int _do_last(const Transition* self, State* s) except -1:
-    s.ent.end = s.i+1
-    s.sent[s.i].ent_iob = 1
-    s.sent[s.i].ent_type = self.label
-    s.i += 1
-
-
-cdef int _do_unit(const Transition* self, State* s) except -1:
-    s.ent += 1
-    s.ents_len += 1
-    s.ent.start = s.i
-    s.ent.label = self.label
-    s.ent.end = s.i+1
-    s.sent[s.i].ent_iob = 3
-    s.sent[s.i].ent_type = self.label
-    s.i += 1
-
-
-cdef int _do_out(const Transition* self, State* s) except -1:
-    s.sent[s.i].ent_iob = 2
-    s.i += 1
-
-
-do_funcs[BEGIN] = _do_begin
-do_funcs[IN] = _do_in
-do_funcs[LAST] = _do_last
-do_funcs[UNIT] = _do_unit
-do_funcs[OUT] = _do_out
+        if g_act == BEGIN:
+            # O, Gold B --> False
+            return 1
+        elif g_act == IN:
+            # O, Gold I --> True
+            return 0
+        elif g_act == LAST:
+            # O, Gold L --> True
+            return 0
+        elif g_act == OUT:
+            # O, Gold O --> True
+            return 0
+        elif g_act == UNIT:
+            # O, Gold U --> False
+            return 1


 class OracleError(Exception):
--- a/spacy/syntax/parser.pyx
+++ b/spacy/syntax/parser.pyx
@ -106,7 +106,7 @@ cdef class Parser:
            fill_context(context, state)
            scores = self.model.score(context)
            guess = self.moves.best_valid(scores, state)
-            guess.do(&guess, state)
+            guess.do(state, guess.label)
        self.moves.finalize_state(state)
        tokens.set_parse(state.sent)

@ -136,9 +136,9 @@ cdef class Parser:
            scores = self.model.score(context)
            guess = self.moves.best_valid(scores, state)
            best = self.moves.best_gold(scores, state, gold)
-            cost = guess.get_cost(&guess, state, &gold.c)
+            cost = guess.get_cost(state, &gold.c, guess.label)
            self.model.update(context, guess.clas, best.clas, cost)
-            guess.do(&guess, state)
+            guess.do(state, guess.label)
            loss += cost
        return loss

@ -180,11 +180,9 @@ cdef class Parser:
                self.moves.set_costs(beam.costs[i], state, gold)
                if follow_gold:
                    for j in range(self.moves.n_moves):
-                        beam.is_valid[i][j] = beam.costs[i][j] == 0
+                        beam.is_valid[i][j] *= beam.costs[i][j] == 0
        beam.advance(_transition_state, <void*>self.moves.c)
        state = <State*>beam.at(0)
-        if state.sent[state.i].sent_end:
-            beam.size = int(beam.size / 2)
        beam.check_done(_check_final_state, NULL)

    def _count_feats(self, dict counts, Tokens tokens, list hist, int inc):
@ -201,7 +199,7 @@ cdef class Parser:
            fill_context(context, state)
            feats = self.model._extractor.get_feats(context, &n_feats)
            count_feats(counts[clas], feats, n_feats, inc)
-            self.moves.c[clas].do(&self.moves.c[clas], state)
+            self.moves.c[clas].do(state, self.moves.c[clas].label)


 # These are passed as callbacks to thinc.search.Beam
@ -211,7 +209,7 @@ cdef int _transition_state(void* _dest, void* _src, class_t clas, void* _moves)
    src = <const State*>_src
    moves = <const Transition*>_moves
    copy_state(dest, src)
-    moves[clas].do(&moves[clas], dest)
+    moves[clas].do(dest, moves[clas].label)


 cdef void* _init_state(Pool mem, int length, void* tokens) except NULL:
--- a/spacy/syntax/transition_system.pxd
+++ b/spacy/syntax/transition_system.pxd
@ -15,14 +15,14 @@ cdef struct Transition:

    weight_t score

-    int (*get_cost)(const Transition* self, const State* state, GoldParseC* gold) except -1
-    int (*do)(const Transition* self, State* state) except -1
+    bint (*is_valid)(const State* state, int label) except -1
+    int (*get_cost)(const State* state, const GoldParseC* gold, int label) except -1
+    int (*do)(State* state, int label) except -1


-ctypedef int (*get_cost_func_t)(const Transition* self, const State* state,
-              GoldParseC* gold) except -1
+ctypedef int (*get_cost_func_t)(const State* state, const GoldParseC* gold, int label) except -1

-ctypedef int (*do_func_t)(const Transition* self, State* state) except -1
+ctypedef int (*do_func_t)(State* state, int label) except -1


 cdef class TransitionSystem:
--- a/spacy/syntax/transition_system.pyx
+++ b/spacy/syntax/transition_system.pyx
@ -51,7 +51,7 @@ cdef class TransitionSystem:
    cdef int set_costs(self, int* output, const State* s, GoldParse gold) except -1:
        cdef int i
        for i in range(self.n_moves):
-            output[i] = self.c[i].get_cost(&self.c[i], s, &gold.c)
+            output[i] = self.c[i].get_cost(s, &gold.c, self.c[i].label)

    cdef Transition best_gold(self, const weight_t* scores, const State* s,
                              GoldParse gold) except *:
@ -59,7 +59,7 @@ cdef class TransitionSystem:
        cdef weight_t score = MIN_SCORE
        cdef int i
        for i in range(self.n_moves):
-            cost = self.c[i].get_cost(&self.c[i], s, &gold.c)
+            cost = self.c[i].get_cost(s, &gold.c, self.c[i].label)
            if scores[i] > score and cost == 0:
                best = self.c[i]
                score = scores[i]