mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-06 05:10:21 +03:00
Add failing test for span-sentence mapping.
This commit is contained in:
parent
6f9d630f7e
commit
513b6370f4
|
@ -6,6 +6,7 @@ from spacy.attrs import ORTH, LENGTH
|
|||
from spacy.lang.en import English
|
||||
from spacy.tokens import Doc, Span, Token
|
||||
from spacy.vocab import Vocab
|
||||
from spacy import load
|
||||
from spacy.util import filter_spans
|
||||
from thinc.api import get_current_ops
|
||||
|
||||
|
@ -680,3 +681,41 @@ def test_span_group_copy(doc):
|
|||
assert len(doc.spans["test"]) == 3
|
||||
# check that the copy spans were not modified and this is an isolated doc
|
||||
assert len(doc_copy.spans["test"]) == 2
|
||||
|
||||
|
||||
@pytest.mark.xfail
|
||||
@pytest.mark.parametrize("use_double_space", [False, True])
|
||||
def test_span_sentence_mapping(use_double_space: bool):
|
||||
"""Tests correct mapping of spans to sentences. This is currently failing due to some issue with the
|
||||
span-to-sentence mapping.
|
||||
use_double_space (bool): Whether to use double space after end of first sentence.
|
||||
"""
|
||||
nlp = load("en_core_web_sm")
|
||||
space = " " if use_double_space else ""
|
||||
raw_sents = [
|
||||
"Well, you're taking your eyes off the road,\" said a governmental affairs representative for Sprint. "
|
||||
+ space,
|
||||
"New Jersey, New York, and the District of Columbia already ban holding a cell phone while driving; a "
|
||||
'"hands-free" cell phone is legal.',
|
||||
]
|
||||
doc = nlp("".join(raw_sents))
|
||||
|
||||
# Ensure sentence splitting works as expected before testing span-to-sentence mapping.
|
||||
# Note that the sentence splitting behavior is already different when using double spaces, which shouldn't be the
|
||||
sents = list(doc.sents)
|
||||
assert len(sents) == 2
|
||||
assert sents[0].text == raw_sents[0] if use_double_space else raw_sents[0][:-1]
|
||||
assert sents[1].text == raw_sents[1]
|
||||
|
||||
# Select span for test.
|
||||
start = 100
|
||||
end = 111 if use_double_space else 110
|
||||
span = doc.char_span(start, end)
|
||||
assert span.text == doc.text[start:end] == space + "New Jersey"
|
||||
|
||||
# Test span-to-sentence mapping. Since the span in question doesn't cross sentence boundaries, there should only be
|
||||
# one sentence.
|
||||
span_sents = list(span.sents)
|
||||
span_sent = span.sent
|
||||
assert len(span_sents) == 1
|
||||
assert span_sent.text == sents[1].text
|
||||
|
|
Loading…
Reference in New Issue
Block a user