mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Fix bug in sentence starts for non-projective parses
The set_children_from_heads function assumed parse trees were projective. However, non-projective parses may be passed in during deserialization, or after deprojectivising. This caused incorrect sentence boundaries to be set for non-projective parses. Close #2772.
This commit is contained in:
parent
48fd36bf05
commit
1759abf1e5
|
@ -2,7 +2,6 @@
|
|||
import pytest
|
||||
from ..util import get_doc
|
||||
|
||||
@pytest.mark.xfail
|
||||
def test_issue2772(en_vocab):
|
||||
words = 'When we write or communicate virtually , we can hide our true feelings .'.split()
|
||||
# A tree with a non-projective (i.e. crossing) arc
|
||||
|
|
|
@ -993,6 +993,8 @@ cdef int set_children_from_heads(TokenC* tokens, int length) except -1:
|
|||
tokens[i].r_kids = 0
|
||||
tokens[i].l_edge = i
|
||||
tokens[i].r_edge = i
|
||||
# Twice, for non-projectivity
|
||||
for _ in range(2):
|
||||
# Set left edges
|
||||
for i in range(length):
|
||||
child = &tokens[i]
|
||||
|
@ -1001,7 +1003,8 @@ cdef int set_children_from_heads(TokenC* tokens, int length) except -1:
|
|||
head.l_kids += 1
|
||||
if child.l_edge < head.l_edge:
|
||||
head.l_edge = child.l_edge
|
||||
|
||||
if child.r_edge > head.r_edge:
|
||||
head.r_edge = child.r_edge
|
||||
# Set right edges --- same as above, but iterate in reverse
|
||||
for i in range(length-1, -1, -1):
|
||||
child = &tokens[i]
|
||||
|
@ -1010,8 +1013,8 @@ cdef int set_children_from_heads(TokenC* tokens, int length) except -1:
|
|||
head.r_kids += 1
|
||||
if child.r_edge > head.r_edge:
|
||||
head.r_edge = child.r_edge
|
||||
|
||||
|
||||
if child.l_edge < head.l_edge:
|
||||
head.l_edge = child.l_edge
|
||||
# Set sentence starts
|
||||
for i in range(length):
|
||||
if tokens[i].head == 0 and tokens[i].dep != 0:
|
||||
|
|
Loading…
Reference in New Issue
Block a user