From 495f528709f2988b8d62d634c3ba4f3a3f3f60cf Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 10 Jun 2015 12:33:55 +0200 Subject: [PATCH] * Add support for sentence breaks in stateclass --- spacy/syntax/stateclass.pxd | 4 ++-- spacy/syntax/stateclass.pyx | 12 ++++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/spacy/syntax/stateclass.pxd b/spacy/syntax/stateclass.pxd index e94e74f0c..dcc57474c 100644 --- a/spacy/syntax/stateclass.pxd +++ b/spacy/syntax/stateclass.pxd @@ -16,10 +16,10 @@ cdef class StateClass: cdef Entity* _ents cdef TokenC _empty_token cdef int length - cdef bint at_sent_end cdef int _s_i cdef int _b_i cdef int _e_i + cdef int _break @staticmethod cdef inline StateClass init(const TokenC* sent, int length): @@ -93,6 +93,6 @@ cdef class StateClass: cdef void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil - cdef void set_sent_end(self, int i) nogil + cdef void set_break(self, int i) nogil cdef void clone(self, StateClass src) nogil diff --git a/spacy/syntax/stateclass.pyx b/spacy/syntax/stateclass.pyx index be3ccf5aa..1e4f3b3f0 100644 --- a/spacy/syntax/stateclass.pyx +++ b/spacy/syntax/stateclass.pyx @@ -14,6 +14,7 @@ cdef class StateClass: self._ents = mem.alloc(length, sizeof(Entity)) self.mem = mem self.length = length + self._break = length self._s_i = 0 self._b_i = 0 self._e_i = 0 @@ -104,10 +105,10 @@ cdef class StateClass: return self._s_i <= 0 cdef bint eol(self) nogil: - return self._b_i >= self.length or self.at_sent_end + return self._b_i >= self._break cdef bint is_final(self) nogil: - return self.stack_depth() <= 1 and self.buffer_length() == 0 + return self.stack_depth() <= 1 and self._b_i >= self.length cdef bint has_head(self, int i) nogil: return self.safe_get(i).head != 0 @@ -130,12 +131,14 @@ cdef class StateClass: return self._s_i cdef int buffer_length(self) nogil: - return self.length - self._b_i + return self._break - self._b_i cdef void push(self) nogil: self._stack[self._s_i] = self.B(0) self._s_i += 1 self._b_i += 1 + if self._b_i >= self._break: + self._break = self.length cdef void pop(self) nogil: self._s_i -= 1 @@ -182,9 +185,10 @@ cdef class StateClass: self._sent[i].ent_iob = ent_iob self._sent[i].ent_type = ent_type - cdef void set_sent_end(self, int i) nogil: + cdef void set_break(self, int i) nogil: if 0 <= i < self.length: self._sent[i].sent_end = True + self._break = i cdef void clone(self, StateClass src) nogil: memcpy(self._sent, src._sent, self.length * sizeof(TokenC))