mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 01:34:30 +03:00
* Fix L/R edge bug, by ensuring l_edge and r_edge are preset, and fixing the way the edge update in del_arc. Bugs keep arising here because the edges are absolute positions, where everything else is relative. I'm also not 100% convinced that del_arc is handled correctly. Do we need to update the parents?
This commit is contained in:
parent
83d1a1e512
commit
0e24d099a1
|
@ -16,12 +16,11 @@ cdef class StateClass:
|
||||||
cdef int i
|
cdef int i
|
||||||
for i in range(length + (PADDING * 2)):
|
for i in range(length + (PADDING * 2)):
|
||||||
self._ents[i].end = -1
|
self._ents[i].end = -1
|
||||||
|
self._sent[i].l_edge = i
|
||||||
|
self._sent[i].r_edge = i
|
||||||
for i in range(length, length + (PADDING * 2)):
|
for i in range(length, length + (PADDING * 2)):
|
||||||
self._sent[i].lex = &EMPTY_LEXEME
|
self._sent[i].lex = &EMPTY_LEXEME
|
||||||
self._sent += PADDING
|
self._sent += PADDING
|
||||||
for i in range(length):
|
|
||||||
self._sent[i].l_edge = i
|
|
||||||
self._sent[i].r_edge = i
|
|
||||||
self._ents += PADDING
|
self._ents += PADDING
|
||||||
self._buffer += PADDING
|
self._buffer += PADDING
|
||||||
self._stack += PADDING
|
self._stack += PADDING
|
||||||
|
@ -162,11 +161,11 @@ cdef class StateClass:
|
||||||
cdef int dist = h_i - c_i
|
cdef int dist = h_i - c_i
|
||||||
cdef TokenC* h = &self._sent[h_i]
|
cdef TokenC* h = &self._sent[h_i]
|
||||||
if c_i > h_i:
|
if c_i > h_i:
|
||||||
|
h.r_edge = self.R_(h_i, 2).r_edge if h.r_kids >= 2 else h_i
|
||||||
h.r_kids -= 1
|
h.r_kids -= 1
|
||||||
h.r_edge = self.R_(h_i, 2).r_edge if h.r_kids >= 1 else h_i
|
|
||||||
else:
|
else:
|
||||||
|
h.l_edge = self.L_(h_i, 2).l_edge if h.l_kids >= 2 else h_i
|
||||||
h.l_kids -= 1
|
h.l_kids -= 1
|
||||||
h.l_edge = self.L_(h_i, 2).l_edge if h.l_kids >= 1 else h_i
|
|
||||||
|
|
||||||
cdef void open_ent(self, int label) nogil:
|
cdef void open_ent(self, int label) nogil:
|
||||||
self._ents[self._e_i].start = self.B(0)
|
self._ents[self._e_i].start = self.B(0)
|
||||||
|
|
|
@ -67,6 +67,8 @@ cdef class Doc:
|
||||||
cdef int i
|
cdef int i
|
||||||
for i in range(size + (PADDING*2)):
|
for i in range(size + (PADDING*2)):
|
||||||
data_start[i].lex = &EMPTY_LEXEME
|
data_start[i].lex = &EMPTY_LEXEME
|
||||||
|
data_start[i].l_edge = i
|
||||||
|
data_start[i].r_edge = i
|
||||||
self.data = data_start + PADDING
|
self.data = data_start + PADDING
|
||||||
self.max_length = size
|
self.max_length = size
|
||||||
self.length = 0
|
self.length = 0
|
||||||
|
@ -219,6 +221,8 @@ cdef class Doc:
|
||||||
t.idx = 0
|
t.idx = 0
|
||||||
else:
|
else:
|
||||||
t.idx = (t-1).idx + (t-1).lex.length + (t-1).spacy
|
t.idx = (t-1).idx + (t-1).lex.length + (t-1).spacy
|
||||||
|
t.l_edge = self.length
|
||||||
|
t.r_edge = self.length
|
||||||
assert t.lex.orth != 0
|
assert t.lex.orth != 0
|
||||||
t.spacy = has_space
|
t.spacy = has_space
|
||||||
self.length += 1
|
self.length += 1
|
||||||
|
@ -310,6 +314,8 @@ cdef class Doc:
|
||||||
self.is_parsed = True
|
self.is_parsed = True
|
||||||
for i in range(self.length):
|
for i in range(self.length):
|
||||||
self.data[i] = parsed[i]
|
self.data[i] = parsed[i]
|
||||||
|
assert self.data[i].l_edge <= i
|
||||||
|
assert self.data[i].r_edge >= i
|
||||||
|
|
||||||
def from_array(self, attrs, array):
|
def from_array(self, attrs, array):
|
||||||
cdef int i, col
|
cdef int i, col
|
||||||
|
|
Loading…
Reference in New Issue
Block a user