mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 18:56:36 +03:00
add test for span.sent when doc not parsed
This commit is contained in:
parent
deab391cbf
commit
5d24a81c0b
|
@ -19,6 +19,15 @@ def doc(en_tokenizer):
|
|||
return get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def doc_not_parsed(en_tokenizer):
|
||||
text = "This is a sentence. This is another sentence. And a third."
|
||||
tokens = en_tokenizer(text)
|
||||
d = get_doc(tokens.vocab, [t.text for t in tokens])
|
||||
d.is_parsed = False
|
||||
return d
|
||||
|
||||
|
||||
def test_spans_sent_spans(doc):
|
||||
sents = list(doc.sents)
|
||||
assert sents[0].start == 0
|
||||
|
@ -34,6 +43,7 @@ def test_spans_root(doc):
|
|||
assert span.root.text == 'sentence'
|
||||
assert span.root.head.text == 'is'
|
||||
|
||||
|
||||
def test_spans_string_fn(doc):
|
||||
span = doc[0:4]
|
||||
assert len(span) == 4
|
||||
|
@ -41,6 +51,7 @@ def test_spans_string_fn(doc):
|
|||
assert span.upper_ == 'THIS IS A SENTENCE'
|
||||
assert span.lower_ == 'this is a sentence'
|
||||
|
||||
|
||||
def test_spans_root2(en_tokenizer):
|
||||
text = "through North and South Carolina"
|
||||
heads = [0, 3, -1, -2, -4]
|
||||
|
@ -49,12 +60,17 @@ def test_spans_root2(en_tokenizer):
|
|||
assert doc[-2:].root.text == 'Carolina'
|
||||
|
||||
|
||||
def test_spans_span_sent(doc):
|
||||
def test_spans_span_sent(doc, doc_not_parsed):
|
||||
"""Test span.sent property"""
|
||||
assert len(list(doc.sents))
|
||||
assert doc[:2].sent.root.text == 'is'
|
||||
assert doc[:2].sent.text == 'This is a sentence .'
|
||||
assert doc[6:7].sent.root.left_edge.text == 'This'
|
||||
# test on manual sbd
|
||||
doc_not_parsed[0].is_sent_start = True
|
||||
doc_not_parsed[5].is_sent_start = True
|
||||
assert doc_not_parsed[1:3].sent == doc_not_parsed[0:5]
|
||||
assert doc_not_parsed[10:14].sent == doc_not_parsed[5:]
|
||||
|
||||
|
||||
def test_spans_lca_matrix(en_tokenizer):
|
||||
|
|
Loading…
Reference in New Issue
Block a user