mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
* Fix runtime error bug that arose from updated Span.root function.
This commit is contained in:
parent
2c8dd91785
commit
87172a15c6
|
@ -161,3 +161,21 @@ def test_merge_hang():
|
|||
doc.from_array([HEAD], heads.T)
|
||||
doc.merge(18, 32, '', '', 'ORG')
|
||||
doc.merge(8, 32, '', '', 'ORG')
|
||||
|
||||
|
||||
@pytest.mark.models
|
||||
def test_runtime_error(EN):
|
||||
# Example that caused run-time error while parsing Reddit
|
||||
text = u'67% of black households are single parent \n\n72% of all black babies born out of wedlock \n\n50% of all black kids don\u2019t finish high school'
|
||||
doc = EN(text)
|
||||
nps = []
|
||||
for np in doc.noun_chunks:
|
||||
while len(np) > 1 and np[0].dep_ not in ('advmod', 'amod', 'compound'):
|
||||
np = np[1:]
|
||||
if len(np) > 1:
|
||||
nps.append((np.start_char, np.end_char, np.root.tag_, np.text, np.root.ent_type_))
|
||||
for np in nps:
|
||||
print(np)
|
||||
for word in doc:
|
||||
print(word.idx, word.text, word.head.i, word.head.text)
|
||||
doc.merge(*np)
|
||||
|
|
|
@ -190,12 +190,6 @@ cdef class Span:
|
|||
for i in range(self.start, self.end):
|
||||
if self.start <= (i+self.doc.c[i].head) < self.end:
|
||||
continue
|
||||
# Don't allow punctuation or spaces to be the root, if there are
|
||||
# better candidates
|
||||
if root != -1 and Lexeme.c_check_flag(self.doc.c[i].lex, IS_PUNCT):
|
||||
continue
|
||||
if root != -1 and Lexeme.c_check_flag(self.doc.c[i].lex, IS_SPACE):
|
||||
continue
|
||||
words_to_root = _count_words_to_root(&self.doc.c[i], self.doc.length)
|
||||
if words_to_root < current_best:
|
||||
current_best = words_to_root
|
||||
|
@ -244,6 +238,11 @@ cdef class Span:
|
|||
|
||||
|
||||
cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1:
|
||||
# Don't allow spaces to be the root, if there are
|
||||
# better candidates
|
||||
if Lexeme.c_check_flag(token.lex, IS_SPACE):
|
||||
return sent_length-1
|
||||
|
||||
cdef int n = 0
|
||||
while token.head != 0:
|
||||
token += token.head
|
||||
|
|
Loading…
Reference in New Issue
Block a user