Add multi-sentence mention test

Also formatting.
2025-11-14 06:45:54 +03:00 · 2021-07-19 13:00:16 +09:00 · 2021-07-19 13:00:16 +09:00 · 3ed0fae671
commit 3ed0fae671
parent 8bd0474730
1 changed files with 19 additions and 8 deletions
--- a/spacy/tests/pipeline/test_coref.py
+++ b/spacy/tests/pipeline/test_coref.py
@ -145,19 +145,30 @@ def test_overfitting_IO(nlp):
    # assert_equal(batch_deps_1, batch_deps_2)
    # assert_equal(batch_deps_1, no_batch_deps)

-def test_crossing_spans():
-    starts = [ 6, 10, 0, 1, 0, 1, 0, 1, 2, 2, 2]
-    ends   = [12, 12, 2, 3, 3, 4, 4, 4, 3, 4, 5]
-    idxs   = list(range(len(starts)))
-    limit  = 5

-    gold = sorted([0 , 1, 2, 4, 6])
+def test_crossing_spans():
+    starts = [6, 10, 0, 1, 0, 1, 0, 1, 2, 2, 2]
+    ends = [12, 12, 2, 3, 3, 4, 4, 4, 3, 4, 5]
+    idxs = list(range(len(starts)))
+    limit = 5
+
+    gold = sorted([0, 1, 2, 4, 6])
    guess = select_non_crossing_spans(idxs, starts, ends, limit)
    guess = sorted(guess)
    assert gold == guess

-def test_mention_generator(nlp):
-    doc = nlp("I like text.") # four tokens
+
+def test_mention_generator():
+    # don't use the fixture because we want the sentencizer
+    nlp = English()
+    doc = nlp("I like text.")  # four tokens
    max_width = 20
    mentions = get_candidate_mentions(doc, max_width)
    assert len(mentions[0]) == 10
+
+    # check multiple sentences
+    nlp.add_pipe("sentencizer")
+    doc = nlp("I like text. This is text.")  # eight tokens, two sents
+    max_width = 20
+    mentions = get_candidate_mentions(doc, max_width)
+    assert len(mentions[0]) == 20