WIP on split parsing

This commit is contained in:
Matthew Honnibal 2018-03-28 01:44:05 +02:00
parent de9fd091ac
commit d399843576
2 changed files with 21 additions and 1 deletions

View File

@ -319,6 +319,21 @@ cdef cppclass StateC:
if this._b_i > this._break:
this._break = -1
void split(int i, int n) nogil:
'''Split token i of the buffer into N pieces.'''
# Let's say we've got a length 10 sentence.
# state.split(5, 2)
# Before: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# After: [0, 1, 2, 3, 4, 5.0, 5.1, 5.2, 6, 7, 8, 9, 10]
# Sentence grows to length 12.
this.length += n
this._sent -= PADDING
this._sent = <TokenC*>realloc(this.length + (PADDING * 2), sizeof(TokenC))
this._sent += PADDING
# Words 6-10 move to positions 8-12
memmove(&this._sent[i+1], &this._sent[i+1+n], (this.length-i)+PADDING*sizeof(TokenC))
# Words 0-5 stay where they are.
void pop() nogil:
if this._s_i >= 1:
this._s_i -= 1

View File

@ -122,6 +122,8 @@ cdef class Shift:
@staticmethod
cdef int transition(StateC* st, attr_t label) nogil:
if label != 0:
st.split(st.B(1), label)
st.push()
st.fast_forward()
@ -135,7 +137,10 @@ cdef class Shift:
@staticmethod
cdef inline weight_t label_cost(StateClass s, const GoldParseC* gold, attr_t label) nogil:
return 0
if gold.fused_tokens[s.B(1)] == label:
return 0
else:
return 1
cdef class Reduce: