spaCy/spacy/tests/regression/test_issue1799.py

'''Test sentence boundaries are deserialized correctly,
even for non-projective sentences.'''
from __future__ import unicode_literals

import pytest
import numpy
from ... tokens import Doc
from ... vocab import Vocab
from ... attrs import HEAD, DEP


def test_issue1799():
    problem_sentence = 'Just what I was looking for.'
    heads_deps = numpy.asarray([[1, 397], [4, 436], [2, 426], [1, 402],
                                [0, 8206900633647566924], [18446744073709551615, 440],
                                [18446744073709551614, 442]], dtype='uint64')
    doc = Doc(Vocab(), words='Just what I was looking for .'.split())
    doc.vocab.strings.add('ROOT')
    doc = doc.from_array([HEAD, DEP], heads_deps)
    assert len(list(doc.sents)) == 1
Add test for #1799: Set left and right edges (and thus sentences) in non-projective parses. 2018-01-22 22:18:38 +03:00			`'''Test sentence boundaries are deserialized correctly,`
			`even for non-projective sentences.'''`
Fix unicode import in test 2018-01-23 01:55:44 +03:00			`from __future__ import unicode_literals`
Add test for #1799: Set left and right edges (and thus sentences) in non-projective parses. 2018-01-22 22:18:38 +03:00
			`import pytest`
			`import numpy`
			`from ... tokens import Doc`
			`from ... vocab import Vocab`
			`from ... attrs import HEAD, DEP`


			`def test_issue1799():`
			`problem_sentence = 'Just what I was looking for.'`
			`heads_deps = numpy.asarray([[1, 397], [4, 436], [2, 426], [1, 402],`
			`[0, 8206900633647566924], [18446744073709551615, 440],`
			`[18446744073709551614, 442]], dtype='uint64')`
			`doc = Doc(Vocab(), words='Just what I was looking for .'.split())`
			`doc.vocab.strings.add('ROOT')`
			`doc = doc.from_array([HEAD, DEP], heads_deps)`
			`assert len(list(doc.sents)) == 1`