spaCy/spacy/tests/regression/test_issue2396.py

# coding: utf-8
from __future__ import unicode_literals

from ..util import get_doc

import pytest
import numpy

@pytest.mark.parametrize('sentence,heads,matrix', [
    (
        'She created a test for spacy',
        [1, 0, 1, -2, -1, -1],
        numpy.array([
            [0, 1, 1, 1, 1, 1],
            [1, 1, 1, 1, 1, 1],
            [1, 1, 2, 3, 3, 3],
            [1, 1, 3, 3, 3, 3],
            [1, 1, 3, 3, 4, 4],
            [1, 1, 3, 3, 4, 5]], dtype=numpy.int32)
    )
    ])
def test_issue2396(en_tokenizer, sentence, heads, matrix):
    tokens = en_tokenizer(sentence)
    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
    span = doc[:]
    assert (doc.get_lca_matrix() == matrix).all()
    assert (span.get_lca_matrix() == matrix).all()