Add multi-sentence mention test

Also formatting.
This commit is contained in:
Paul O'Leary McCann 2021-07-19 13:00:16 +09:00
parent 8bd0474730
commit 3ed0fae671

View File

@ -145,6 +145,7 @@ def test_overfitting_IO(nlp):
# assert_equal(batch_deps_1, batch_deps_2) # assert_equal(batch_deps_1, batch_deps_2)
# assert_equal(batch_deps_1, no_batch_deps) # assert_equal(batch_deps_1, no_batch_deps)
def test_crossing_spans(): def test_crossing_spans():
starts = [6, 10, 0, 1, 0, 1, 0, 1, 2, 2, 2] starts = [6, 10, 0, 1, 0, 1, 0, 1, 2, 2, 2]
ends = [12, 12, 2, 3, 3, 4, 4, 4, 3, 4, 5] ends = [12, 12, 2, 3, 3, 4, 4, 4, 3, 4, 5]
@ -156,8 +157,18 @@ def test_crossing_spans():
guess = sorted(guess) guess = sorted(guess)
assert gold == guess assert gold == guess
def test_mention_generator(nlp):
def test_mention_generator():
# don't use the fixture because we want the sentencizer
nlp = English()
doc = nlp("I like text.") # four tokens doc = nlp("I like text.") # four tokens
max_width = 20 max_width = 20
mentions = get_candidate_mentions(doc, max_width) mentions = get_candidate_mentions(doc, max_width)
assert len(mentions[0]) == 10 assert len(mentions[0]) == 10
# check multiple sentences
nlp.add_pipe("sentencizer")
doc = nlp("I like text. This is text.") # eight tokens, two sents
max_width = 20
mentions = get_candidate_mentions(doc, max_width)
assert len(mentions[0]) == 20