mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
e03e1eee92
This PR adds a test for an untested case of `Span.get_lca_matrix`, and fixes a bug for that scenario, which I introduced in [this PR](https://github.com/explosion/spaCy/pull/3089) (sorry!). ## Description The previous implementation of get_lca_matrix was failing for the case `doc[j:k].get_lca_matrix()` where `j > 0`. A test has been added for this case and the bug has been fixed. ### Types of change Bug fix ## Checklist - [x] I have submitted the spaCy Contributor Agreement. - [x] I ran the tests, and all new and existing tests passed. - [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
30 lines
776 B
Python
30 lines
776 B
Python
# coding: utf-8
|
|
from __future__ import unicode_literals
|
|
|
|
from ..util import get_doc
|
|
|
|
import pytest
|
|
import numpy
|
|
|
|
@pytest.mark.parametrize('sentence,heads,matrix', [
|
|
(
|
|
'She created a test for spacy',
|
|
[1, 0, 1, -2, -1, -1],
|
|
numpy.array([
|
|
[0, 1, 1, 1, 1, 1],
|
|
[1, 1, 1, 1, 1, 1],
|
|
[1, 1, 2, 3, 3, 3],
|
|
[1, 1, 3, 3, 3, 3],
|
|
[1, 1, 3, 3, 4, 4],
|
|
[1, 1, 3, 3, 4, 5]], dtype=numpy.int32)
|
|
)
|
|
])
|
|
def test_issue2396(en_tokenizer, sentence, heads, matrix):
|
|
tokens = en_tokenizer(sentence)
|
|
doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
|
|
span = doc[:]
|
|
assert (doc.get_lca_matrix() == matrix).all()
|
|
assert (span.get_lca_matrix() == matrix).all()
|
|
|
|
|