mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-03 19:08:06 +03:00
* Add unshift action to StateClass, and track which moves have been shifted
This commit is contained in:
parent
f7c8069e65
commit
7bf6b7de3e
|
@ -11,10 +11,12 @@ cdef class StateClass:
|
||||||
cdef Pool mem
|
cdef Pool mem
|
||||||
cdef int* _stack
|
cdef int* _stack
|
||||||
cdef int* _buffer
|
cdef int* _buffer
|
||||||
|
cdef bint* shifted
|
||||||
cdef TokenC* _sent
|
cdef TokenC* _sent
|
||||||
cdef Entity* _ents
|
cdef Entity* _ents
|
||||||
cdef TokenC _empty_token
|
cdef TokenC _empty_token
|
||||||
cdef int length
|
cdef int length
|
||||||
|
cdef bint at_sent_end
|
||||||
cdef int _s_i
|
cdef int _s_i
|
||||||
cdef int _b_i
|
cdef int _b_i
|
||||||
cdef int _e_i
|
cdef int _e_i
|
||||||
|
@ -64,7 +66,6 @@ cdef class StateClass:
|
||||||
cdef bint is_final(self) nogil
|
cdef bint is_final(self) nogil
|
||||||
|
|
||||||
cdef bint has_head(self, int i) nogil
|
cdef bint has_head(self, int i) nogil
|
||||||
|
|
||||||
|
|
||||||
cdef int n_L(self, int i) nogil
|
cdef int n_L(self, int i) nogil
|
||||||
|
|
||||||
|
@ -79,6 +80,8 @@ cdef class StateClass:
|
||||||
cdef void push(self) nogil
|
cdef void push(self) nogil
|
||||||
|
|
||||||
cdef void pop(self) nogil
|
cdef void pop(self) nogil
|
||||||
|
|
||||||
|
cdef void unshift(self) nogil
|
||||||
|
|
||||||
cdef void add_arc(self, int head, int child, int label) nogil
|
cdef void add_arc(self, int head, int child, int label) nogil
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ cdef class StateClass:
|
||||||
cdef Pool mem = Pool()
|
cdef Pool mem = Pool()
|
||||||
self._buffer = <int*>mem.alloc(length, sizeof(int))
|
self._buffer = <int*>mem.alloc(length, sizeof(int))
|
||||||
self._stack = <int*>mem.alloc(length, sizeof(int))
|
self._stack = <int*>mem.alloc(length, sizeof(int))
|
||||||
|
self.shifted = <bint*>mem.alloc(length, sizeof(bint))
|
||||||
self._sent = <TokenC*>mem.alloc(length, sizeof(TokenC))
|
self._sent = <TokenC*>mem.alloc(length, sizeof(TokenC))
|
||||||
self._ents = <Entity*>mem.alloc(length, sizeof(Entity))
|
self._ents = <Entity*>mem.alloc(length, sizeof(Entity))
|
||||||
self.mem = mem
|
self.mem = mem
|
||||||
|
@ -103,10 +104,10 @@ cdef class StateClass:
|
||||||
return self._s_i <= 0
|
return self._s_i <= 0
|
||||||
|
|
||||||
cdef bint eol(self) nogil:
|
cdef bint eol(self) nogil:
|
||||||
return self._b_i >= self.length
|
return self._b_i >= self.length or self.at_sent_end
|
||||||
|
|
||||||
cdef bint is_final(self) nogil:
|
cdef bint is_final(self) nogil:
|
||||||
return self.eol() and self.stack_depth() <= 1
|
return self.stack_depth() <= 1 and self.buffer_length() == 0
|
||||||
|
|
||||||
cdef bint has_head(self, int i) nogil:
|
cdef bint has_head(self, int i) nogil:
|
||||||
return self.safe_get(i).head != 0
|
return self.safe_get(i).head != 0
|
||||||
|
@ -133,12 +134,18 @@ cdef class StateClass:
|
||||||
|
|
||||||
cdef void push(self) nogil:
|
cdef void push(self) nogil:
|
||||||
self._stack[self._s_i] = self.B(0)
|
self._stack[self._s_i] = self.B(0)
|
||||||
|
self.shifted[self.B(0)] = True
|
||||||
self._s_i += 1
|
self._s_i += 1
|
||||||
self._b_i += 1
|
self._b_i += 1
|
||||||
|
|
||||||
cdef void pop(self) nogil:
|
cdef void pop(self) nogil:
|
||||||
self._s_i -= 1
|
self._s_i -= 1
|
||||||
|
|
||||||
|
cdef void unshift(self) nogil:
|
||||||
|
self._b_i -= 1
|
||||||
|
self._buffer[self._b_i] = self.S(0)
|
||||||
|
self._s_i -= 1
|
||||||
|
|
||||||
cdef void add_arc(self, int head, int child, int label) nogil:
|
cdef void add_arc(self, int head, int child, int label) nogil:
|
||||||
if self.has_head(child):
|
if self.has_head(child):
|
||||||
self.del_arc(self.H(child), child)
|
self.del_arc(self.H(child), child)
|
||||||
|
@ -190,12 +197,12 @@ cdef class StateClass:
|
||||||
|
|
||||||
def print_state(self, words):
|
def print_state(self, words):
|
||||||
words = list(words) + ['_']
|
words = list(words) + ['_']
|
||||||
top = words[self.S(0)] + '_%d' % self.H(self.S(0))
|
top = words[self.S(0)] + '_%d' % self.S_(0).head
|
||||||
second = words[self.S(1)] + '_%d' % self.H(self.S(1))
|
second = words[self.S(1)] + '_%d' % self.S_(1).head
|
||||||
third = words[self.S(2)] + '_%d' % self.H(self.S(2))
|
third = words[self.S(2)] + '_%d' % self.S_(2).head
|
||||||
n0 = words[self.B(0)]
|
n0 = words[self.B(0)]
|
||||||
n1 = words[self.B(1)]
|
n1 = words[self.B(1)]
|
||||||
return ' '.join((str(self.stack_depth()), third, second, top, '|', n0, n1))
|
return ' '.join((str(self.buffer_length()), str(self.stack_depth()), third, second, top, '|', n0, n1))
|
||||||
|
|
||||||
|
|
||||||
# From https://en.wikipedia.org/wiki/Hamming_weight
|
# From https://en.wikipedia.org/wiki/Hamming_weight
|
||||||
|
|
Loading…
Reference in New Issue
Block a user