* Add loop guard in Span.root, to raise errors if there is a cycle in the dependency parse, instead of entering an infinite loop. Re Issue #214

This commit is contained in:
Matthew Honnibal 2016-01-16 11:53:37 +01:00
parent 478a79a3d5
commit 42a9f29b40

View File

@ -164,8 +164,28 @@ cdef class Span:
cdef const TokenC* start = &self.doc.c[self.start] cdef const TokenC* start = &self.doc.c[self.start]
cdef const TokenC* end = &self.doc.c[self.end] cdef const TokenC* end = &self.doc.c[self.end]
head = start head = start
cdef int nr_iter = 0
while start <= (head + head.head) < end and head.head != 0: while start <= (head + head.head) < end and head.head != 0:
head += head.head head += head.head
# Guard against infinite loops
if nr_iter >= (self.doc.length+1):
# Retrieve the words without getting the Python tokens, to
# avoid potential problems
try:
words = [self.doc.vocab.strings[self.doc.c[i].lex.orth] for i
in range(self.doc.length)]
except:
words = '<Exception retrieving words!>'
try:
heads = [self.doc.c[i].head for i in range(self.doc.length)]
except:
heads = '<Exception retrieving heads!>'
raise RuntimeError(
"Invalid dependency parse, leading to potentially infinite loop. " +
"Please report this error on the issue tracker.\n" +
("Words: %s\n" % repr(words)) +
("Heads: %s\n" % repr(heads)))
nr_iter += 1
return self.doc[head - self.doc.c] return self.doc[head - self.doc.c]
property lefts: property lefts: