Rename spans2ints > _spans_to_offsets

This commit is contained in:
Paul O'Leary McCann 2022-07-04 19:28:35 +09:00
parent b09bbc7f5e
commit c7f333d593
3 changed files with 15 additions and 15 deletions

View File

@ -205,7 +205,7 @@ def create_gold_scores(
return out return out
def spans2ints(doc): def _spans_to_offsets(doc):
"""Convert doc.spans to nested list of ints for comparison. """Convert doc.spans to nested list of ints for comparison.
The ints are character indices, and the spans groups are sorted by key first. The ints are character indices, and the spans groups are sorted by key first.

View File

@ -9,7 +9,7 @@ from spacy.ml.models.coref_util import (
DEFAULT_CLUSTER_PREFIX, DEFAULT_CLUSTER_PREFIX,
select_non_crossing_spans, select_non_crossing_spans,
get_sentence_ids, get_sentence_ids,
spans2ints, _spans_to_offsets,
) )
from thinc.util import has_torch from thinc.util import has_torch
@ -101,7 +101,7 @@ def test_coref_serialization(nlp):
assert nlp2.pipe_names == ["coref"] assert nlp2.pipe_names == ["coref"]
doc2 = nlp2(text) doc2 = nlp2(text)
assert spans2ints(doc) == spans2ints(doc2) assert _spans_to_offsets(doc) == _spans_to_offsets(doc2)
@pytest.mark.skipif(not has_torch, reason="Torch not available") @pytest.mark.skipif(not has_torch, reason="Torch not available")
@ -140,8 +140,8 @@ def test_overfitting_IO(nlp):
docs1 = list(nlp.pipe(texts)) docs1 = list(nlp.pipe(texts))
docs2 = list(nlp.pipe(texts)) docs2 = list(nlp.pipe(texts))
docs3 = [nlp(text) for text in texts] docs3 = [nlp(text) for text in texts]
assert spans2ints(docs1[0]) == spans2ints(docs2[0]) assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs2[0])
assert spans2ints(docs1[0]) == spans2ints(docs3[0]) assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs3[0])
@pytest.mark.skipif(not has_torch, reason="Torch not available") @pytest.mark.skipif(not has_torch, reason="Torch not available")
@ -196,8 +196,8 @@ def test_tokenization_mismatch(nlp):
docs1 = list(nlp.pipe(texts)) docs1 = list(nlp.pipe(texts))
docs2 = list(nlp.pipe(texts)) docs2 = list(nlp.pipe(texts))
docs3 = [nlp(text) for text in texts] docs3 = [nlp(text) for text in texts]
assert spans2ints(docs1[0]) == spans2ints(docs2[0]) assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs2[0])
assert spans2ints(docs1[0]) == spans2ints(docs3[0]) assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs3[0])
@pytest.mark.skipif(not has_torch, reason="Torch not available") @pytest.mark.skipif(not has_torch, reason="Torch not available")

View File

@ -9,7 +9,7 @@ from spacy.ml.models.coref_util import (
DEFAULT_CLUSTER_PREFIX, DEFAULT_CLUSTER_PREFIX,
select_non_crossing_spans, select_non_crossing_spans,
get_sentence_ids, get_sentence_ids,
spans2ints, _spans_to_offsets,
) )
from thinc.util import has_torch from thinc.util import has_torch
@ -88,7 +88,7 @@ def test_span_predictor_serialization(nlp):
assert nlp2.pipe_names == ["span_predictor"] assert nlp2.pipe_names == ["span_predictor"]
doc2 = nlp2(text) doc2 = nlp2(text)
assert spans2ints(doc) == spans2ints(doc2) assert _spans_to_offsets(doc) == _spans_to_offsets(doc2)
@pytest.mark.skipif(not has_torch, reason="Torch not available") @pytest.mark.skipif(not has_torch, reason="Torch not available")
@ -122,7 +122,7 @@ def test_overfitting_IO(nlp):
# test the trained model, using the pred since it has heads # test the trained model, using the pred since it has heads
doc = nlp(train_examples[0].predicted) doc = nlp(train_examples[0].predicted)
# XXX This actually tests that it can overfit # XXX This actually tests that it can overfit
assert spans2ints(doc) == spans2ints(train_examples[0].reference) assert _spans_to_offsets(doc) == _spans_to_offsets(train_examples[0].reference)
# Also test the results are still the same after IO # Also test the results are still the same after IO
with make_tempdir() as tmp_dir: with make_tempdir() as tmp_dir:
@ -140,8 +140,8 @@ def test_overfitting_IO(nlp):
docs1 = list(nlp.pipe(texts)) docs1 = list(nlp.pipe(texts))
docs2 = list(nlp.pipe(texts)) docs2 = list(nlp.pipe(texts))
docs3 = [nlp(text) for text in texts] docs3 = [nlp(text) for text in texts]
assert spans2ints(docs1[0]) == spans2ints(docs2[0]) assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs2[0])
assert spans2ints(docs1[0]) == spans2ints(docs3[0]) assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs3[0])
@pytest.mark.skipif(not has_torch, reason="Torch not available") @pytest.mark.skipif(not has_torch, reason="Torch not available")
@ -187,7 +187,7 @@ def test_tokenization_mismatch(nlp):
test_doc = train_examples[0].predicted test_doc = train_examples[0].predicted
doc = nlp(test_doc) doc = nlp(test_doc)
# XXX This actually tests that it can overfit # XXX This actually tests that it can overfit
assert spans2ints(doc) == spans2ints(train_examples[0].reference) assert _spans_to_offsets(doc) == _spans_to_offsets(train_examples[0].reference)
# Also test the results are still the same after IO # Also test the results are still the same after IO
with make_tempdir() as tmp_dir: with make_tempdir() as tmp_dir:
@ -206,6 +206,6 @@ def test_tokenization_mismatch(nlp):
docs1 = list(nlp.pipe(texts)) docs1 = list(nlp.pipe(texts))
docs2 = list(nlp.pipe(texts)) docs2 = list(nlp.pipe(texts))
docs3 = [nlp(text) for text in texts] docs3 = [nlp(text) for text in texts]
assert spans2ints(docs1[0]) == spans2ints(docs2[0]) assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs2[0])
assert spans2ints(docs1[0]) == spans2ints(docs3[0]) assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs3[0])