move preset_spans_suggester test to spancat tests

This commit is contained in:
kadarakos 2023-06-02 10:08:16 +00:00
parent 3ec1cb5e30
commit 9372b22d32
2 changed files with 17 additions and 18 deletions

View File

@ -1,6 +1,5 @@
import pytest
from thinc.api import Config
from thinc.types import Ragged
from spacy.language import Language
from spacy.lang.en import English
@ -193,23 +192,6 @@ def test_set_annotations_span_lengths(min_length, max_length, span_count):
assert all(min_length <= len(span) <= max_length for span in doc.spans[SPANS_KEY])
def test_span_finder_suggester():
nlp = Language()
docs = [nlp("This is an example."), nlp("This is the second example.")]
docs[0].spans[SPANS_KEY] = [docs[0][3:4]]
docs[1].spans[SPANS_KEY] = [docs[1][0:4], docs[1][3:5]]
suggester = registry.misc.get("spacy.preset_spans_suggester.v1")(
spans_key=SPANS_KEY
)
candidates = suggester(docs)
assert type(candidates) == Ragged
assert len(candidates) == 2
assert list(candidates.dataXd[0]) == [3, 4]
assert list(candidates.dataXd[1]) == [0, 4]
assert list(candidates.dataXd[2]) == [3, 5]
assert list(candidates.lengths) == [1, 2]
def test_overfitting_IO():
# Simple test to try and quickly overfit the span_finder component - ensuring the ML models work correctly
fix_random_seed(0)

View File

@ -406,6 +406,23 @@ def test_ngram_sizes(en_tokenizer):
assert_array_equal(OPS.to_numpy(ngrams_3.lengths), [0, 1, 3, 6, 9])
def test_preset_spans_suggester():
nlp = Language()
docs = [nlp("This is an example."), nlp("This is the second example.")]
docs[0].spans[SPAN_KEY] = [docs[0][3:4]]
docs[1].spans[SPAN_KEY] = [docs[1][0:4], docs[1][3:5]]
suggester = registry.misc.get("spacy.preset_spans_suggester.v1")(
spans_key=SPAN_KEY
)
candidates = suggester(docs)
assert type(candidates) == Ragged
assert len(candidates) == 2
assert list(candidates.dataXd[0]) == [3, 4]
assert list(candidates.dataXd[1]) == [0, 4]
assert list(candidates.dataXd[2]) == [3, 5]
assert list(candidates.lengths) == [1, 2]
def test_overfitting_IO():
# Simple test to try and quickly overfit the spancat component - ensuring the ML models work correctly
fix_random_seed(0)