mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 18:36:36 +03:00
* Add support for sentence breaks in stateclass
This commit is contained in:
parent
b7b18c279d
commit
495f528709
|
@ -16,10 +16,10 @@ cdef class StateClass:
|
||||||
cdef Entity* _ents
|
cdef Entity* _ents
|
||||||
cdef TokenC _empty_token
|
cdef TokenC _empty_token
|
||||||
cdef int length
|
cdef int length
|
||||||
cdef bint at_sent_end
|
|
||||||
cdef int _s_i
|
cdef int _s_i
|
||||||
cdef int _b_i
|
cdef int _b_i
|
||||||
cdef int _e_i
|
cdef int _e_i
|
||||||
|
cdef int _break
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
cdef inline StateClass init(const TokenC* sent, int length):
|
cdef inline StateClass init(const TokenC* sent, int length):
|
||||||
|
@ -93,6 +93,6 @@ cdef class StateClass:
|
||||||
|
|
||||||
cdef void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil
|
cdef void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil
|
||||||
|
|
||||||
cdef void set_sent_end(self, int i) nogil
|
cdef void set_break(self, int i) nogil
|
||||||
|
|
||||||
cdef void clone(self, StateClass src) nogil
|
cdef void clone(self, StateClass src) nogil
|
||||||
|
|
|
@ -14,6 +14,7 @@ cdef class StateClass:
|
||||||
self._ents = <Entity*>mem.alloc(length, sizeof(Entity))
|
self._ents = <Entity*>mem.alloc(length, sizeof(Entity))
|
||||||
self.mem = mem
|
self.mem = mem
|
||||||
self.length = length
|
self.length = length
|
||||||
|
self._break = length
|
||||||
self._s_i = 0
|
self._s_i = 0
|
||||||
self._b_i = 0
|
self._b_i = 0
|
||||||
self._e_i = 0
|
self._e_i = 0
|
||||||
|
@ -104,10 +105,10 @@ cdef class StateClass:
|
||||||
return self._s_i <= 0
|
return self._s_i <= 0
|
||||||
|
|
||||||
cdef bint eol(self) nogil:
|
cdef bint eol(self) nogil:
|
||||||
return self._b_i >= self.length or self.at_sent_end
|
return self._b_i >= self._break
|
||||||
|
|
||||||
cdef bint is_final(self) nogil:
|
cdef bint is_final(self) nogil:
|
||||||
return self.stack_depth() <= 1 and self.buffer_length() == 0
|
return self.stack_depth() <= 1 and self._b_i >= self.length
|
||||||
|
|
||||||
cdef bint has_head(self, int i) nogil:
|
cdef bint has_head(self, int i) nogil:
|
||||||
return self.safe_get(i).head != 0
|
return self.safe_get(i).head != 0
|
||||||
|
@ -130,12 +131,14 @@ cdef class StateClass:
|
||||||
return self._s_i
|
return self._s_i
|
||||||
|
|
||||||
cdef int buffer_length(self) nogil:
|
cdef int buffer_length(self) nogil:
|
||||||
return self.length - self._b_i
|
return self._break - self._b_i
|
||||||
|
|
||||||
cdef void push(self) nogil:
|
cdef void push(self) nogil:
|
||||||
self._stack[self._s_i] = self.B(0)
|
self._stack[self._s_i] = self.B(0)
|
||||||
self._s_i += 1
|
self._s_i += 1
|
||||||
self._b_i += 1
|
self._b_i += 1
|
||||||
|
if self._b_i >= self._break:
|
||||||
|
self._break = self.length
|
||||||
|
|
||||||
cdef void pop(self) nogil:
|
cdef void pop(self) nogil:
|
||||||
self._s_i -= 1
|
self._s_i -= 1
|
||||||
|
@ -182,9 +185,10 @@ cdef class StateClass:
|
||||||
self._sent[i].ent_iob = ent_iob
|
self._sent[i].ent_iob = ent_iob
|
||||||
self._sent[i].ent_type = ent_type
|
self._sent[i].ent_type = ent_type
|
||||||
|
|
||||||
cdef void set_sent_end(self, int i) nogil:
|
cdef void set_break(self, int i) nogil:
|
||||||
if 0 <= i < self.length:
|
if 0 <= i < self.length:
|
||||||
self._sent[i].sent_end = True
|
self._sent[i].sent_end = True
|
||||||
|
self._break = i
|
||||||
|
|
||||||
cdef void clone(self, StateClass src) nogil:
|
cdef void clone(self, StateClass src) nogil:
|
||||||
memcpy(self._sent, src._sent, self.length * sizeof(TokenC))
|
memcpy(self._sent, src._sent, self.length * sizeof(TokenC))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user