mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-13 13:17:06 +03:00
1759abf1e5
The set_children_from_heads function assumed parse trees were projective. However, non-projective parses may be passed in during deserialization, or after deprojectivising. This caused incorrect sentence boundaries to be set for non-projective parses. Close #2772.
13 lines
529 B
Python
13 lines
529 B
Python
'''Test that deprojectivization doesn't mess up sentence boundaries.'''
|
|
import pytest
|
|
from ..util import get_doc
|
|
|
|
def test_issue2772(en_vocab):
|
|
words = 'When we write or communicate virtually , we can hide our true feelings .'.split()
|
|
# A tree with a non-projective (i.e. crossing) arc
|
|
# The arcs (0, 4) and (2, 9) cross.
|
|
heads = [4, 1, 7, -1, -2, -1, 3, 2, 1, 0, -1, -2, -1]
|
|
deps = ['dep'] * len(heads)
|
|
doc = get_doc(en_vocab, words=words, heads=heads, deps=deps)
|
|
assert doc[1].is_sent_start is None
|