mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-19 04:32:32 +03:00
Add sentence map test
This commit is contained in:
parent
3ed0fae671
commit
a151c62d13
|
@ -6,7 +6,11 @@ from spacy.training import Example
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.tests.util import make_tempdir
|
from spacy.tests.util import make_tempdir
|
||||||
from spacy.pipeline.coref import DEFAULT_CLUSTERS_PREFIX
|
from spacy.pipeline.coref import DEFAULT_CLUSTERS_PREFIX
|
||||||
from spacy.ml.models.coref_util import select_non_crossing_spans, get_candidate_mentions
|
from spacy.ml.models.coref_util import (
|
||||||
|
select_non_crossing_spans,
|
||||||
|
get_candidate_mentions,
|
||||||
|
get_sentence_map,
|
||||||
|
)
|
||||||
|
|
||||||
# fmt: off
|
# fmt: off
|
||||||
TRAIN_DATA = [
|
TRAIN_DATA = [
|
||||||
|
@ -35,6 +39,13 @@ def nlp():
|
||||||
return English()
|
return English()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def snlp():
|
||||||
|
en = English()
|
||||||
|
en.add_pipe("sentencizer")
|
||||||
|
return en
|
||||||
|
|
||||||
|
|
||||||
def test_add_pipe(nlp):
|
def test_add_pipe(nlp):
|
||||||
nlp.add_pipe("coref")
|
nlp.add_pipe("coref")
|
||||||
assert nlp.pipe_names == ["coref"]
|
assert nlp.pipe_names == ["coref"]
|
||||||
|
@ -158,17 +169,21 @@ def test_crossing_spans():
|
||||||
assert gold == guess
|
assert gold == guess
|
||||||
|
|
||||||
|
|
||||||
def test_mention_generator():
|
def test_mention_generator(snlp):
|
||||||
# don't use the fixture because we want the sentencizer
|
nlp = snlp
|
||||||
nlp = English()
|
|
||||||
doc = nlp("I like text.") # four tokens
|
doc = nlp("I like text.") # four tokens
|
||||||
max_width = 20
|
max_width = 20
|
||||||
mentions = get_candidate_mentions(doc, max_width)
|
mentions = get_candidate_mentions(doc, max_width)
|
||||||
assert len(mentions[0]) == 10
|
assert len(mentions[0]) == 10
|
||||||
|
|
||||||
# check multiple sentences
|
# check multiple sentences
|
||||||
nlp.add_pipe("sentencizer")
|
|
||||||
doc = nlp("I like text. This is text.") # eight tokens, two sents
|
doc = nlp("I like text. This is text.") # eight tokens, two sents
|
||||||
max_width = 20
|
max_width = 20
|
||||||
mentions = get_candidate_mentions(doc, max_width)
|
mentions = get_candidate_mentions(doc, max_width)
|
||||||
assert len(mentions[0]) == 20
|
assert len(mentions[0]) == 20
|
||||||
|
|
||||||
|
|
||||||
|
def test_sentence_map(snlp):
|
||||||
|
doc = snlp("I like text. This is text.")
|
||||||
|
sm = get_sentence_map(doc)
|
||||||
|
assert sm == [0, 0, 0, 0, 1, 1, 1, 1]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user