Rename spans2ints > _spans_to_offsets

2025-08-08 14:14:57 +03:00 · 2022-07-04 19:28:35 +09:00 · 2022-07-04 19:28:35 +09:00 · c7f333d593
commit c7f333d593
parent b09bbc7f5e
3 changed files with 15 additions and 15 deletions
--- a/spacy/ml/models/coref_util.py
+++ b/spacy/ml/models/coref_util.py
@ -205,7 +205,7 @@ def create_gold_scores(
    return out


-def spans2ints(doc):
+def _spans_to_offsets(doc):
    """Convert doc.spans to nested list of ints for comparison.
    The ints are character indices, and the spans groups are sorted by key first.

--- a/spacy/tests/pipeline/test_coref.py
+++ b/spacy/tests/pipeline/test_coref.py
@ -9,7 +9,7 @@ from spacy.ml.models.coref_util import (
    DEFAULT_CLUSTER_PREFIX,
    select_non_crossing_spans,
    get_sentence_ids,
-    spans2ints,
+    _spans_to_offsets,
 )

 from thinc.util import has_torch
@ -101,7 +101,7 @@ def test_coref_serialization(nlp):
        assert nlp2.pipe_names == ["coref"]
        doc2 = nlp2(text)

-        assert spans2ints(doc) == spans2ints(doc2)
+        assert _spans_to_offsets(doc) == _spans_to_offsets(doc2)


@pytest.mark.skipif(not has_torch, reason="Torch not available")
@ -140,8 +140,8 @@ def test_overfitting_IO(nlp):
    docs1 = list(nlp.pipe(texts))
    docs2 = list(nlp.pipe(texts))
    docs3 = [nlp(text) for text in texts]
-    assert spans2ints(docs1[0]) == spans2ints(docs2[0])
-    assert spans2ints(docs1[0]) == spans2ints(docs3[0])
+    assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs2[0])
+    assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs3[0])


@pytest.mark.skipif(not has_torch, reason="Torch not available")
@ -196,8 +196,8 @@ def test_tokenization_mismatch(nlp):
    docs1 = list(nlp.pipe(texts))
    docs2 = list(nlp.pipe(texts))
    docs3 = [nlp(text) for text in texts]
-    assert spans2ints(docs1[0]) == spans2ints(docs2[0])
-    assert spans2ints(docs1[0]) == spans2ints(docs3[0])
+    assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs2[0])
+    assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs3[0])


@pytest.mark.skipif(not has_torch, reason="Torch not available")
--- a/spacy/tests/pipeline/test_span_predictor.py
+++ b/spacy/tests/pipeline/test_span_predictor.py
@ -9,7 +9,7 @@ from spacy.ml.models.coref_util import (
    DEFAULT_CLUSTER_PREFIX,
    select_non_crossing_spans,
    get_sentence_ids,
-    spans2ints,
+    _spans_to_offsets,
 )

 from thinc.util import has_torch
@ -88,7 +88,7 @@ def test_span_predictor_serialization(nlp):
        assert nlp2.pipe_names == ["span_predictor"]
        doc2 = nlp2(text)

-        assert spans2ints(doc) == spans2ints(doc2)
+        assert _spans_to_offsets(doc) == _spans_to_offsets(doc2)


@pytest.mark.skipif(not has_torch, reason="Torch not available")
@ -122,7 +122,7 @@ def test_overfitting_IO(nlp):
    # test the trained model, using the pred since it has heads
    doc = nlp(train_examples[0].predicted)
    # XXX This actually tests that it can overfit
-    assert spans2ints(doc) == spans2ints(train_examples[0].reference)
+    assert _spans_to_offsets(doc) == _spans_to_offsets(train_examples[0].reference)

    # Also test the results are still the same after IO
    with make_tempdir() as tmp_dir:
@ -140,8 +140,8 @@ def test_overfitting_IO(nlp):
    docs1 = list(nlp.pipe(texts))
    docs2 = list(nlp.pipe(texts))
    docs3 = [nlp(text) for text in texts]
-    assert spans2ints(docs1[0]) == spans2ints(docs2[0])
-    assert spans2ints(docs1[0]) == spans2ints(docs3[0])
+    assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs2[0])
+    assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs3[0])


@pytest.mark.skipif(not has_torch, reason="Torch not available")
@ -187,7 +187,7 @@ def test_tokenization_mismatch(nlp):
    test_doc = train_examples[0].predicted
    doc = nlp(test_doc)
    # XXX This actually tests that it can overfit
-    assert spans2ints(doc) == spans2ints(train_examples[0].reference)
+    assert _spans_to_offsets(doc) == _spans_to_offsets(train_examples[0].reference)

    # Also test the results are still the same after IO
    with make_tempdir() as tmp_dir:
@ -206,6 +206,6 @@ def test_tokenization_mismatch(nlp):
    docs1 = list(nlp.pipe(texts))
    docs2 = list(nlp.pipe(texts))
    docs3 = [nlp(text) for text in texts]
-    assert spans2ints(docs1[0]) == spans2ints(docs2[0])
-    assert spans2ints(docs1[0]) == spans2ints(docs3[0])
+    assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs2[0])
+    assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs3[0])