From b34a1325d398b96a8ac870e6c76d7ef0207fc51d Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sun, 21 Dec 2014 05:42:23 +1100
Subject: [PATCH] * Everything compiling after reorg. About to start testing.

---
 spacy/syntax/_state.pxd    |  26 +++++--
 spacy/syntax/_state.pyx    |  49 ++++++++++----
 spacy/syntax/arc_eager.pxd |  10 ++-
 spacy/syntax/arc_eager.pyx | 135 ++++++++++++++++++++++---------------
 4 files changed, 143 insertions(+), 77 deletions(-)

diff --git a/spacy/syntax/_state.pxd b/spacy/syntax/_state.pxd
index ab8ce3962..d54cd28d6 100644
--- a/spacy/syntax/_state.pxd
+++ b/spacy/syntax/_state.pxd
@@ -20,8 +20,7 @@ cdef int pop_stack(State *s) except -1
 cdef int push_stack(State *s) except -1
 
 
-cdef inline bint has_head(const TokenC* t) nogil:
-    return t.head != 0
+cdef bint has_head(const TokenC* t) nogil
 
 
 cdef inline int get_idx(const State* s, const TokenC* t) nogil:
@@ -71,14 +70,29 @@ cdef inline bint is_final(const State *s) nogil:
     return at_eol(s) # The stack will be attached to root anyway
 
 
-cdef int children_in_buffer(const State *s, const int head, int* gold) except -1
-cdef int head_in_buffer(const State *s, const int child, int* gold) except -1
-cdef int children_in_stack(const State *s, const int head, int* gold) except -1
-cdef int head_in_stack(const State *s, const int child, int* gold) except -1
+cdef int children_in_buffer(const State *s, const int head, const int* gold) except -1
+cdef int head_in_buffer(const State *s, const int child, const int* gold) except -1
+cdef int children_in_stack(const State *s, const int head, const int* gold) except -1
+cdef int head_in_stack(const State *s, const int child, const int* gold) except -1
 
 cdef State* init_state(Pool mem, TokenC* sent, const int sent_length) except NULL
 
 
+cdef int count_left_kids(const TokenC* head) nogil
+
+
+cdef int count_right_kids(const TokenC* head) nogil
+
+
+# From https://en.wikipedia.org/wiki/Hamming_weight
+cdef inline uint32_t _popcount(uint32_t x) nogil:
+    """Find number of non-zero bits."""
+    cdef int count = 0
+    while x != 0:
+        x &= x - 1
+        count += 1
+    return count
+
 
 cdef inline uint32_t _nth_significant_bit(uint32_t bits, int n) nogil:
     cdef int i
diff --git a/spacy/syntax/_state.pyx b/spacy/syntax/_state.pyx
index 6bdfdea3e..144b9b9b0 100644
--- a/spacy/syntax/_state.pyx
+++ b/spacy/syntax/_state.pyx
@@ -3,24 +3,32 @@ from libc.string cimport memmove
 from cymem.cymem cimport Pool
 
 from ..lexeme cimport EMPTY_LEXEME
+from ..structs cimport TokenC
+
+
+DEF PADDING = 5
+DEF NON_MONOTONIC = True
 
 
 cdef int add_dep(State *s, int head, int child, int label) except -1:
-    s.sent[child].head = head - child
+    cdef int dist = head - child
+    s.sent[child].head = dist
     s.sent[child].dep_tag = label
     # Keep a bit-vector tracking child dependencies.  If a word has a child at
     # offset i from it, set that bit (tracking left and right separately)
     if child > head:
-        s.sent[head].r_kids |= 1 << (-s.sent[child].head)
+        s.sent[head].r_kids |= 1 << (-dist)
     else:
-        s.sent[head].l_kids |= 1 << s.sent[child].head
+        s.sent[head].l_kids |= 1 << dist
 
 
 cdef int pop_stack(State *s) except -1:
     assert s.stack_len >= 1
     s.stack_len -= 1
     s.stack -= 1
-
+    if s.stack_len == 0 and not at_eol(s):
+        push_stack(s)
+        
 
 cdef int push_stack(State *s) except -1:
     assert s.i < s.sent_len
@@ -28,9 +36,14 @@ cdef int push_stack(State *s) except -1:
     s.stack[0] = s.i
     s.stack_len += 1
     s.i += 1
+    if at_eol(s):
+        while s.stack_len != 0:
+            if not has_head(get_s0(s)):
+                get_s0(s).dep_tag = 0
+            pop_stack(s)
 
 
-cdef int children_in_buffer(const State *s, int head, int* gold) except -1:
+cdef int children_in_buffer(const State *s, int head, const int* gold) except -1:
     # Golds holds an array of head offsets --- the head of word i is i - golds[i]
     # Iterate over the tokens of the queue, and check whether their gold head is
     # our target
@@ -42,20 +55,21 @@ cdef int children_in_buffer(const State *s, int head, int* gold) except -1:
     return n
 
 
-cdef int head_in_buffer(const State *s, const int child, int* gold) except -1:
+cdef int head_in_buffer(const State *s, const int child, const int* gold) except -1:
     return gold[child] >= s.i
 
 
-cdef int children_in_stack(const State *s, const int head, int* gold) except -1:
+cdef int children_in_stack(const State *s, const int head, const int* gold) except -1:
     cdef int i
     cdef int n = 0
     for i in range(s.stack_len):
         if gold[s.stack[-i]] == head:
-            n += 1
+            if NON_MONOTONIC or not has_head(get_s0(s)):
+                n += 1
     return n
 
 
-cdef int head_in_stack(const State *s, const int child, int* gold) except -1:
+cdef int head_in_stack(const State *s, const int child, const int* gold) except -1:
     cdef int i
     for i in range(s.stack_len):
         if gold[child] == s.stack[-i]:
@@ -72,7 +86,7 @@ cdef const TokenC* get_left(const State* s, const TokenC* head, const int idx) n
     if child >= s.sent:
         return child
     else:
-        return s.sent - 1
+        return NULL
 
 
 cdef const TokenC* get_right(const State* s, const TokenC* head, const int idx) nogil:
@@ -84,10 +98,20 @@ cdef const TokenC* get_right(const State* s, const TokenC* head, const int idx)
     if child < (s.sent + s.sent_len):
         return child
     else:
-        return s.sent - 1
+        return NULL
 
 
-DEF PADDING = 5
+cdef bint has_head(const TokenC* t) nogil:
+    return t.head != 0
+
+
+cdef int count_left_kids(const TokenC* head) nogil:
+    return _popcount(head.l_kids)
+
+
+cdef int count_right_kids(const TokenC* head) nogil:
+    return _popcount(head.r_kids)
+
 
 
 cdef State* init_state(Pool mem, TokenC* sent, const int sent_length) except NULL:
@@ -102,4 +126,5 @@ cdef State* init_state(Pool mem, TokenC* sent, const int sent_length) except NUL
     s.stack_len = 0
     s.i = 0
     s.sent_len = sent_length
+    push_stack(s)
     return s
diff --git a/spacy/syntax/arc_eager.pxd b/spacy/syntax/arc_eager.pxd
index ee9d7b9a8..da8163e51 100644
--- a/spacy/syntax/arc_eager.pxd
+++ b/spacy/syntax/arc_eager.pxd
@@ -7,8 +7,11 @@ from ._state cimport State
 
 
 cdef struct Transition:
+    int clas
     int move
     int label
+    int cost
+    weight_t score
 
 
 cdef class TransitionSystem:
@@ -18,7 +21,8 @@ cdef class TransitionSystem:
 
     cdef const Transition* _moves
 
-    cdef Transition best_valid(self, const weight_t* scores, const State* s) except -1
-    cdef Transition best_gold(self, const weight_t* scores, const State* s,
-                              int* gold_heads, int* gold_labels) except -1
+    cdef Transition best_valid(self, const weight_t* scores, const State* s) except *
+    cdef Transition best_gold(self, Transition* guess, const weight_t* scores,
+                              const State* s,
+                              const int* gold_heads, const int* gold_labels) except *
     cdef int transition(self, State *s, const Transition* t) except -1
diff --git a/spacy/syntax/arc_eager.pyx b/spacy/syntax/arc_eager.pyx
index 2883aa403..25790bacd 100644
--- a/spacy/syntax/arc_eager.pyx
+++ b/spacy/syntax/arc_eager.pyx
@@ -5,7 +5,9 @@ from ._state cimport is_final, at_eol, pop_stack, push_stack, add_dep
 from ._state cimport head_in_buffer, children_in_buffer
 from ._state cimport head_in_stack, children_in_stack
 
-from ..tokens cimport TokenC
+from ..structs cimport TokenC
+
+DEF NON_MONOTONIC = True
 
 
 cdef enum:
@@ -25,22 +27,30 @@ cdef inline bint _can_right(const State* s) nogil:
 
 
 cdef inline bint _can_left(const State* s) nogil:
-    return s.stack_len >= 1 and not has_head(get_s0(s))
+    if NON_MONOTONIC:
+        return s.stack_len >= 1
+    else:
+        return s.stack_len >= 1 and not has_head(get_s0(s))
 
 
 cdef inline bint _can_reduce(const State* s) nogil:
-    return s.stack_len >= 2 and has_head(get_s0(s))
+    if NON_MONOTONIC:
+        return s.stack_len >= 2
+    else:
+        return s.stack_len >= 2 and has_head(get_s0(s))
 
 
-cdef int _shift_cost(const State* s, int* gold) except -1:
+cdef int _shift_cost(const State* s, const int* gold) except -1:
     assert not at_eol(s)
     cost = 0
     cost += head_in_stack(s, s.i, gold)
     cost += children_in_stack(s, s.i, gold)
+    if NON_MONOTONIC:
+        cost += gold[s.stack[0]] == s.i
     return cost
 
 
-cdef int _right_cost(const State* s, int* gold) except -1:
+cdef int _right_cost(const State* s, const int* gold) except -1:
     assert s.stack_len >= 1
     cost = 0
     if gold[s.i] == s.stack[0]:
@@ -48,10 +58,12 @@ cdef int _right_cost(const State* s, int* gold) except -1:
     cost += head_in_buffer(s, s.i, gold)
     cost += children_in_stack(s, s.i, gold)
     cost += head_in_stack(s, s.i, gold)
+    if NON_MONOTONIC:
+        cost += gold[s.stack[0]] == s.i
     return cost
 
 
-cdef int _left_cost(const State* s, int* gold) except -1:
+cdef int _left_cost(const State* s, const int* gold) except -1:
     assert s.stack_len >= 1
     cost = 0
     if gold[s.stack[0]] == s.i:
@@ -59,11 +71,17 @@ cdef int _left_cost(const State* s, int* gold) except -1:
 
     cost += head_in_buffer(s, s.stack[0], gold)
     cost += children_in_buffer(s, s.stack[0], gold)
+    if NON_MONOTONIC and s.stack_len >= 2:
+        cost += gold[s.stack[0]] == s.stack[-1]
     return cost
 
 
-cdef int _reduce_cost(const State* s, int* gold) except -1:
-    return children_in_buffer(s, s.stack[0], gold)
+cdef int _reduce_cost(const State* s, const int* gold) except -1:
+    cdef int cost = 0
+    cost += children_in_buffer(s, s.stack[0], gold)
+    if NON_MONOTONIC:
+        cost += head_in_buffer(s, s.stack[0], gold)
+    return cost
 
 
 cdef class TransitionSystem:
@@ -73,38 +91,40 @@ cdef class TransitionSystem:
         right_labels.sort()
         if 'ROOT' in right_labels:
             right_labels.pop(right_labels.index('ROOT'))
-        if 'dep' in right_labels:
-            right_labels.pop(right_labels.index('dep'))
         if 'ROOT' in left_labels:
             left_labels.pop(left_labels.index('ROOT'))
-        if 'dep' in left_labels:
-            left_labels.pop(left_labels.index('dep'))
         self.n_moves = 2 + len(left_labels) + len(right_labels) 
         moves = <Transition*>self.mem.alloc(self.n_moves, sizeof(Transition))
         cdef int i = 0
         moves[i].move = SHIFT
         moves[i].label = 0
+        moves[i].clas = i
         i += 1
         moves[i].move = REDUCE
         moves[i].label = 0
+        moves[i].clas = i
         i += 1
-        self.label_ids = {'ROOT': 0, 'dep': -1}
+        self.label_ids = {'ROOT': 0}
         cdef int label_id
         for label_str in left_labels:
             label_id = self.label_ids.setdefault(label_str, len(self.label_ids))
             moves[i].move = LEFT
             moves[i].label = label_id
+            moves[i].clas = i
             i += 1
         for label_str in right_labels:
             label_id = self.label_ids.setdefault(label_str, len(self.label_ids))
             moves[i].move = RIGHT
             moves[i].label = label_id
+            moves[i].clas = i
             i += 1
         self._moves = moves
 
-    cdef int transition(self, State *s, const int clas) except -1:
-        cdef const Transition* t = &self._moves[clas]
+    cdef int transition(self, State *s, const Transition* t) except -1:
         if t.move == SHIFT:
+            # Set the dep label, in case we need it after we reduce
+            if NON_MONOTONIC:
+                get_s0(s).dep_tag = t.label
             push_stack(s)
         elif t.move == LEFT:
             add_dep(s, s.i, s.stack[0], t.label)
@@ -113,11 +133,12 @@ cdef class TransitionSystem:
             add_dep(s, s.stack[0], s.i, t.label)
             push_stack(s)
         elif t.move == REDUCE:
+            add_dep(s, s.stack[-1], s.stack[0], get_s0(s).dep_tag)
             pop_stack(s)
         else:
             raise StandardError(t.move)
 
-    cdef int best_valid(self, const weight_t* scores, const State* s) except -1:
+    cdef Transition best_valid(self, const weight_t* scores, const State* s) except *:
         cdef bint[N_MOVES] valid
         valid[SHIFT] = _can_shift(s)
         valid[LEFT] = _can_left(s)
@@ -126,59 +147,61 @@ cdef class TransitionSystem:
 
         cdef int best = -1
         cdef weight_t score = 0
+        cdef weight_t best_r_score = -9000
+        cdef int best_r_label = -1
         cdef int i
         for i in range(self.n_moves):
             if valid[self._moves[i].move] and (best == -1 or scores[i] > score):
                 best = i
                 score = scores[i]
+            if self._moves[i].move == RIGHT and scores[i] > best_r_score:
+                best_r_label = self._moves[i].label
         assert best >= 0
-        return best
+        cdef Transition t = self._moves[best]
+        t.score = score
+        if t.move == SHIFT:
+            t.label = best_r_label
+        return t
 
-    cdef int best_gold(self, const weight_t* scores, const State* s,
-                       int* gold_heads, int* gold_labels) except -1:
+    cdef Transition best_gold(self, Transition* guess, const weight_t* scores,
+                              const State* s,
+                              const int* gold_heads, const int* gold_labels) except *:
+        # If we can create a gold dependency, only one action can be correct
         cdef int[N_MOVES] unl_costs
         unl_costs[SHIFT] = _shift_cost(s, gold_heads) if _can_shift(s) else -1
         unl_costs[LEFT] = _left_cost(s, gold_heads) if _can_left(s) else -1
         unl_costs[RIGHT] = _right_cost(s, gold_heads) if _can_right(s) else -1
         unl_costs[REDUCE] = _reduce_cost(s, gold_heads) if _can_reduce(s) else -1
 
-        cdef int cost
-        cdef int move
-        cdef int label
+        guess.cost = unl_costs[guess.move]
+        cdef Transition t
+        cdef int target_label
+        cdef int i
+        if gold_heads[s.stack[0]] == s.i:
+            target_label = gold_labels[s.stack[0]]
+            if guess.move == LEFT:
+                guess.cost += guess.label != target_label
+            for i in range(self.n_moves):
+                t = self._moves[i]
+                if t.move == LEFT and t.label == target_label:
+                    return t
+        elif gold_heads[s.i] == s.stack[0]:
+            target_label = gold_labels[s.i]
+            if guess.move == RIGHT:
+                guess.cost += guess.label != target_label
+            for i in range(self.n_moves):
+                t = self._moves[i]
+                if t.move == RIGHT and t.label == target_label:
+                    return t
+
         cdef int best = -1
         cdef weight_t score = -9000
-        cdef int i
         for i in range(self.n_moves):
-            move = self._moves[i].move
-            label = self._moves[i].label
-            if unl_costs[move] == 0: 
-                if move == SHIFT or move == REDUCE:
-                    cost = 0
-                elif move == LEFT:
-                    if gold_heads[s.stack[0]] == s.i and gold_labels[s.stack[0]] != -1:
-                        cost = label != gold_labels[s.stack[0]]
-                    else:
-                        cost = 0
-                elif move == RIGHT:
-                    if gold_heads[s.i] == s.stack[0] and gold_labels[s.i] != -1:
-                        cost = label != gold_labels[s.i]
-                    else:
-                        cost = 0
-                else:
-                    raise StandardError("Unknown Move")
-                if cost == 0 and (best == -1 or scores[i] > score):
-                    best = i
-                    score = scores[i]
- 
-        if best < 0:
-            print unl_costs[SHIFT], unl_costs[REDUCE], unl_costs[LEFT], unl_costs[RIGHT]
-            print s.stack_len
-            print has_head(get_s0(s))
-            print s.sent[s.stack[0]].head
-            print s.stack[0], s.i
-            print gold_heads[s.stack[0]], gold_heads[s.i]
-            print gold_labels[s.i]
-            print children_in_buffer(s, s.stack[0], gold_heads)
-            print head_in_buffer(s, s.stack[0], gold_heads)
-            raise StandardError 
-        return best
+            t = self._moves[i]
+            if unl_costs[t.move] == 0 and (best == -1 or scores[i] > score):
+                best = i
+                score = scores[i]
+        t = self._moves[best]
+        t.score = score
+        assert best >= 0
+        return t