Add Token.is_sent_start property, so can deprecate Token.sent_start

This commit is contained in:
Matthew Honnibal 2017-11-01 13:27:14 +01:00
parent 7e7116cdf7
commit 9e0ebee81c
2 changed files with 28 additions and 8 deletions

View File

@ -155,13 +155,13 @@ def test_doc_token_api_head_setter(en_tokenizer):
assert doc[2].left_edge.i == 0
def test_sent_start(en_tokenizer):
def test_is_sent_start(en_tokenizer):
doc = en_tokenizer(u'This is a sentence. This is another.')
assert not doc[0].sent_start
assert not doc[5].sent_start
doc[5].sent_start = True
assert doc[5].sent_start
assert not doc[0].sent_start
assert doc[5].is_sent_start is None
doc[5].is_sent_start = True
assert doc[5].is_sent_start is True
# Backwards compatibility
assert doc[0].sent_start is False
doc.is_parsed = True
assert len(list(doc.sents)) == 2

View File

@ -330,9 +330,29 @@ cdef class Token:
return self.c.r_kids
property sent_start:
# TODO: fix and document
# TODO deprecation warning
def __get__(self):
return self.c.sent_start
# Handle broken backwards compatibility case: doc[0].sent_start
# was False.
if self.i == 0:
return False
else:
return self.sent_start
def __set__(self, value):
self.is_sent_start = value
property is_sent_start:
"""RETURNS (bool / None): Whether the token starts a sentence.
None if unknown.
"""
def __get__(self):
if self.c.sent_start == 0:
return None
elif self.c.sent_start < 0:
return False
else:
return True
def __set__(self, value):
if self.doc.is_parsed: