Adjust parsing transition system to allow preset sentence segments.

This commit is contained in:
Matthew Honnibal 2017-10-08 23:53:34 +02:00
parent 080afd4924
commit e938bce320
3 changed files with 9 additions and 5 deletions

View File

@ -307,6 +307,8 @@ cdef cppclass StateC:
this._stack[this._s_i] = this.B(0) this._stack[this._s_i] = this.B(0)
this._s_i += 1 this._s_i += 1
this._b_i += 1 this._b_i += 1
if this.B_(0).sent_start == 1:
this.set_break(this.B(0))
if this._b_i > this._break: if this._b_i > this._break:
this._break = -1 this._break = -1
@ -383,7 +385,7 @@ cdef cppclass StateC:
void set_break(int i) nogil: void set_break(int i) nogil:
if 0 <= i < this.length: if 0 <= i < this.length:
this._sent[i].sent_start = True this._sent[i].sent_start = 1
this._break = this._b_i this._break = this._b_i
void clone(const StateC* src) nogil: void clone(const StateC* src) nogil:

View File

@ -118,7 +118,7 @@ cdef bint _is_gold_root(const GoldParseC* gold, int word) nogil:
cdef class Shift: cdef class Shift:
@staticmethod @staticmethod
cdef bint is_valid(const StateC* st, attr_t label) nogil: cdef bint is_valid(const StateC* st, attr_t label) nogil:
return st.buffer_length() >= 2 and not st.shifted[st.B(0)] and not st.B_(0).sent_start return st.buffer_length() >= 2 and not st.shifted[st.B(0)] and st.B_(0).sent_start != 1
@staticmethod @staticmethod
cdef int transition(StateC* st, attr_t label) nogil: cdef int transition(StateC* st, attr_t label) nogil:
@ -178,7 +178,7 @@ cdef class Reduce:
cdef class LeftArc: cdef class LeftArc:
@staticmethod @staticmethod
cdef bint is_valid(const StateC* st, attr_t label) nogil: cdef bint is_valid(const StateC* st, attr_t label) nogil:
return not st.B_(0).sent_start return st.B_(0).sent_start != 1
@staticmethod @staticmethod
cdef int transition(StateC* st, attr_t label) nogil: cdef int transition(StateC* st, attr_t label) nogil:
@ -212,7 +212,7 @@ cdef class LeftArc:
cdef class RightArc: cdef class RightArc:
@staticmethod @staticmethod
cdef bint is_valid(const StateC* st, attr_t label) nogil: cdef bint is_valid(const StateC* st, attr_t label) nogil:
return not st.B_(0).sent_start return st.B_(0).sent_start != 1
@staticmethod @staticmethod
cdef int transition(StateC* st, attr_t label) nogil: cdef int transition(StateC* st, attr_t label) nogil:
@ -248,6 +248,8 @@ cdef class Break:
return False return False
elif st.stack_depth() < 1: elif st.stack_depth() < 1:
return False return False
elif st.B_(0).l_edge < 0:
return False
else: else:
return True return True

View File

@ -485,7 +485,7 @@ cdef class Doc:
cdef int i cdef int i
start = 0 start = 0
for i in range(1, self.length): for i in range(1, self.length):
if self.c[i].sent_start: if self.c[i].sent_start == 1:
yield Span(self, start, i) yield Span(self, start, i)
start = i start = i
if start != self.length: if start != self.length: