mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 05:37:03 +03:00
Add sent property to token (#2521)
* Add sent property to token * Refactored and cleaned up copy paste errors.
This commit is contained in:
parent
a82c3153ad
commit
c21efea9bb
|
@ -56,6 +56,14 @@ def test_doc_token_api_str_builtin(en_tokenizer, text):
|
||||||
assert str(tokens[0]) == text.split(' ')[0]
|
assert str(tokens[0]) == text.split(' ')[0]
|
||||||
assert str(tokens[1]) == text.split(' ')[1]
|
assert str(tokens[1]) == text.split(' ')[1]
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def doc(en_tokenizer):
|
||||||
|
text = "This is a sentence. This is another sentence. And a third."
|
||||||
|
heads = [1, 0, 1, -2, -3, 1, 0, 1, -2, -3, 0, 1, -2, -1]
|
||||||
|
deps = ['nsubj', 'ROOT', 'det', 'attr', 'punct', 'nsubj', 'ROOT', 'det',
|
||||||
|
'attr', 'punct', 'ROOT', 'det', 'npadvmod', 'punct']
|
||||||
|
tokens = en_tokenizer(text)
|
||||||
|
return get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps)
|
||||||
|
|
||||||
def test_doc_token_api_is_properties(en_vocab):
|
def test_doc_token_api_is_properties(en_vocab):
|
||||||
text = ["Hi", ",", "my", "email", "is", "test@me.com"]
|
text = ["Hi", ",", "my", "email", "is", "test@me.com"]
|
||||||
|
@ -162,3 +170,11 @@ def test_is_sent_start(en_tokenizer):
|
||||||
assert doc[5].is_sent_start is True
|
assert doc[5].is_sent_start is True
|
||||||
doc.is_parsed = True
|
doc.is_parsed = True
|
||||||
assert len(list(doc.sents)) == 2
|
assert len(list(doc.sents)) == 2
|
||||||
|
|
||||||
|
def test_tokens_sent(doc):
|
||||||
|
"""Test token.sent property"""
|
||||||
|
assert len(list(doc.sents)) == 3
|
||||||
|
assert doc[1].sent.text == 'This is a sentence .'
|
||||||
|
assert doc[7].sent.text == 'This is another sentence .'
|
||||||
|
assert doc[1].sent.root.left_edge.text == 'This'
|
||||||
|
assert doc[7].sent.root.left_edge.text == 'This'
|
||||||
|
|
|
@ -356,6 +356,13 @@ cdef class Token:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.c.r_kids
|
return self.c.r_kids
|
||||||
|
|
||||||
|
property sent:
|
||||||
|
"""RETURNS (Span): The sentence span that the token is a part of."""
|
||||||
|
def __get__(self):
|
||||||
|
if 'sent' in self.doc.user_token_hooks:
|
||||||
|
return self.doc.user_token_hooks['sent'](self)
|
||||||
|
return self.doc[self.i : self.i+1].sent
|
||||||
|
|
||||||
property sent_start:
|
property sent_start:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
# Raising a deprecation warning here causes errors for autocomplete
|
# Raising a deprecation warning here causes errors for autocomplete
|
||||||
|
|
Loading…
Reference in New Issue
Block a user