spaCy/spacy/tests/regression/test_issue2772.py
Matthew Honnibal 1759abf1e5 Fix bug in sentence starts for non-projective parses
The set_children_from_heads function assumed parse trees were
projective. However, non-projective parses may be passed in during
deserialization, or after deprojectivising. This caused incorrect
sentence boundaries to be set for non-projective parses. Close #2772.
2018-09-19 14:50:06 +02:00

13 lines
529 B
Python

'''Test that deprojectivization doesn't mess up sentence boundaries.'''
import pytest
from ..util import get_doc
def test_issue2772(en_vocab):
words = 'When we write or communicate virtually , we can hide our true feelings .'.split()
# A tree with a non-projective (i.e. crossing) arc
# The arcs (0, 4) and (2, 9) cross.
heads = [4, 1, 7, -1, -2, -1, 3, 2, 1, 0, -1, -2, -1]
deps = ['dep'] * len(heads)
doc = get_doc(en_vocab, words=words, heads=heads, deps=deps)
assert doc[1].is_sent_start is None