Add sentence map test

2025-09-19 10:32:40 +03:00 · 2021-07-19 13:05:26 +09:00 · 2021-07-19 13:05:26 +09:00 · a151c62d13
commit a151c62d13
parent 3ed0fae671
1 changed files with 20 additions and 5 deletions
--- a/spacy/tests/pipeline/test_coref.py
+++ b/spacy/tests/pipeline/test_coref.py
@ -6,7 +6,11 @@ from spacy.training import Example
 from spacy.lang.en import English
 from spacy.tests.util import make_tempdir
 from spacy.pipeline.coref import DEFAULT_CLUSTERS_PREFIX
-from spacy.ml.models.coref_util import select_non_crossing_spans, get_candidate_mentions
+from spacy.ml.models.coref_util import (
    select_non_crossing_spans,
    get_candidate_mentions,
    get_sentence_map,
 )
 # fmt: off
 TRAIN_DATA = [
@ -35,6 +39,13 @@ def nlp():
    return English()
@pytest.fixture
 def snlp():
    en = English()
    en.add_pipe("sentencizer")
    return en
 def test_add_pipe(nlp):
    nlp.add_pipe("coref")
    assert nlp.pipe_names == ["coref"]
@ -158,17 +169,21 @@ def test_crossing_spans():
    assert gold == guess
-def test_mention_generator():
+def test_mention_generator(snlp):
-    # don't use the fixture because we want the sentencizer
+    nlp = snlp
    nlp = English()
    doc = nlp("I like text.")  # four tokens
    max_width = 20
    mentions = get_candidate_mentions(doc, max_width)
    assert len(mentions[0]) == 10
    # check multiple sentences
    nlp.add_pipe("sentencizer")
    doc = nlp("I like text. This is text.")  # eight tokens, two sents
    max_width = 20
    mentions = get_candidate_mentions(doc, max_width)
    assert len(mentions[0]) == 20
 def test_sentence_map(snlp):
    doc = snlp("I like text. This is text.")
    sm = get_sentence_map(doc)
    assert sm == [0, 0, 0, 0, 1, 1, 1, 1]