* Fix L/R edge bug, by ensuring l_edge and r_edge are preset, and fixing the way the edge update in del_arc. Bugs keep arising here because the edges are absolute positions, where everything else is relative. I'm also not 100% convinced that del_arc is handled correctly. Do we need to update the parents?

This commit is contained in:
Matthew Honnibal 2015-09-09 03:39:46 +02:00
parent 83d1a1e512
commit 0e24d099a1
2 changed files with 10 additions and 5 deletions

View File

@ -16,12 +16,11 @@ cdef class StateClass:
cdef int i cdef int i
for i in range(length + (PADDING * 2)): for i in range(length + (PADDING * 2)):
self._ents[i].end = -1 self._ents[i].end = -1
self._sent[i].l_edge = i
self._sent[i].r_edge = i
for i in range(length, length + (PADDING * 2)): for i in range(length, length + (PADDING * 2)):
self._sent[i].lex = &EMPTY_LEXEME self._sent[i].lex = &EMPTY_LEXEME
self._sent += PADDING self._sent += PADDING
for i in range(length):
self._sent[i].l_edge = i
self._sent[i].r_edge = i
self._ents += PADDING self._ents += PADDING
self._buffer += PADDING self._buffer += PADDING
self._stack += PADDING self._stack += PADDING
@ -162,11 +161,11 @@ cdef class StateClass:
cdef int dist = h_i - c_i cdef int dist = h_i - c_i
cdef TokenC* h = &self._sent[h_i] cdef TokenC* h = &self._sent[h_i]
if c_i > h_i: if c_i > h_i:
h.r_edge = self.R_(h_i, 2).r_edge if h.r_kids >= 2 else h_i
h.r_kids -= 1 h.r_kids -= 1
h.r_edge = self.R_(h_i, 2).r_edge if h.r_kids >= 1 else h_i
else: else:
h.l_edge = self.L_(h_i, 2).l_edge if h.l_kids >= 2 else h_i
h.l_kids -= 1 h.l_kids -= 1
h.l_edge = self.L_(h_i, 2).l_edge if h.l_kids >= 1 else h_i
cdef void open_ent(self, int label) nogil: cdef void open_ent(self, int label) nogil:
self._ents[self._e_i].start = self.B(0) self._ents[self._e_i].start = self.B(0)

View File

@ -67,6 +67,8 @@ cdef class Doc:
cdef int i cdef int i
for i in range(size + (PADDING*2)): for i in range(size + (PADDING*2)):
data_start[i].lex = &EMPTY_LEXEME data_start[i].lex = &EMPTY_LEXEME
data_start[i].l_edge = i
data_start[i].r_edge = i
self.data = data_start + PADDING self.data = data_start + PADDING
self.max_length = size self.max_length = size
self.length = 0 self.length = 0
@ -219,6 +221,8 @@ cdef class Doc:
t.idx = 0 t.idx = 0
else: else:
t.idx = (t-1).idx + (t-1).lex.length + (t-1).spacy t.idx = (t-1).idx + (t-1).lex.length + (t-1).spacy
t.l_edge = self.length
t.r_edge = self.length
assert t.lex.orth != 0 assert t.lex.orth != 0
t.spacy = has_space t.spacy = has_space
self.length += 1 self.length += 1
@ -310,6 +314,8 @@ cdef class Doc:
self.is_parsed = True self.is_parsed = True
for i in range(self.length): for i in range(self.length):
self.data[i] = parsed[i] self.data[i] = parsed[i]
assert self.data[i].l_edge <= i
assert self.data[i].r_edge >= i
def from_array(self, attrs, array): def from_array(self, attrs, array):
cdef int i, col cdef int i, col