* Add unshift action to StateClass, and track which moves have been shifted

2025-12-13 05:04:17 +03:00 · 2015-06-10 10:13:03 +02:00 · 2015-06-10 10:13:03 +02:00 · 7bf6b7de3e
commit 7bf6b7de3e
parent f7c8069e65
2 changed files with 17 additions and 7 deletions
--- a/spacy/syntax/stateclass.pxd
+++ b/spacy/syntax/stateclass.pxd
@ -11,10 +11,12 @@ cdef class StateClass:
    cdef Pool mem
    cdef int* _stack
    cdef int* _buffer
    cdef bint* shifted
    cdef TokenC* _sent
    cdef Entity* _ents
    cdef TokenC _empty_token
    cdef int length
    cdef bint at_sent_end
    cdef int _s_i
    cdef int _b_i
    cdef int _e_i
@ -64,7 +66,6 @@ cdef class StateClass:
    cdef bint is_final(self) nogil
    cdef bint has_head(self, int i) nogil
    cdef int n_L(self, int i) nogil
@ -79,6 +80,8 @@ cdef class StateClass:
    cdef void push(self) nogil
    cdef void pop(self) nogil
    cdef void unshift(self) nogil
    cdef void add_arc(self, int head, int child, int label) nogil
--- a/spacy/syntax/stateclass.pyx
+++ b/spacy/syntax/stateclass.pyx
@ -9,6 +9,7 @@ cdef class StateClass:
        cdef Pool mem = Pool()
        self._buffer = <int*>mem.alloc(length, sizeof(int))
        self._stack = <int*>mem.alloc(length, sizeof(int))
        self.shifted = <bint*>mem.alloc(length, sizeof(bint))
        self._sent = <TokenC*>mem.alloc(length, sizeof(TokenC))
        self._ents = <Entity*>mem.alloc(length, sizeof(Entity))
        self.mem = mem
@ -103,10 +104,10 @@ cdef class StateClass:
        return self._s_i <= 0
    cdef bint eol(self) nogil:
-        return self._b_i >= self.length
+        return self._b_i >= self.length or self.at_sent_end
    cdef bint is_final(self) nogil:
-        return self.eol() and self.stack_depth() <= 1
+        return self.stack_depth() <= 1 and self.buffer_length() == 0
    cdef bint has_head(self, int i) nogil:
        return self.safe_get(i).head != 0
@ -133,12 +134,18 @@ cdef class StateClass:
    cdef void push(self) nogil:
        self._stack[self._s_i] = self.B(0)
        self.shifted[self.B(0)] = True
        self._s_i += 1
        self._b_i += 1
    cdef void pop(self) nogil:
        self._s_i -= 1
    cdef void unshift(self) nogil:
        self._b_i -= 1
        self._buffer[self._b_i] = self.S(0)
        self._s_i -= 1
    cdef void add_arc(self, int head, int child, int label) nogil:
        if self.has_head(child):
            self.del_arc(self.H(child), child)
@ -190,12 +197,12 @@ cdef class StateClass:
    def print_state(self, words):
        words = list(words) + ['_']
-        top = words[self.S(0)] + '_%d' % self.H(self.S(0))
+        top = words[self.S(0)] + '_%d' % self.S_(0).head
-        second = words[self.S(1)] + '_%d' % self.H(self.S(1))
+        second = words[self.S(1)] + '_%d' % self.S_(1).head
-        third = words[self.S(2)] + '_%d' % self.H(self.S(2))
+        third = words[self.S(2)] + '_%d' % self.S_(2).head
        n0 = words[self.B(0)] 
        n1 = words[self.B(1)] 
-        return ' '.join((str(self.stack_depth()), third, second, top, '|', n0, n1))
+        return ' '.join((str(self.buffer_length()), str(self.stack_depth()), third, second, top, '|', n0, n1))
 # From https://en.wikipedia.org/wiki/Hamming_weight