Add test for Issue #1537

This commit is contained in:
Matthew Honnibal 2018-01-15 15:20:56 +01:00
parent ee70fd2d75
commit 6b215d2dd3

View File

@ -0,0 +1,30 @@
'''Test Span.as_doc() doesn't segfault'''
from ...tokens import Doc
from ...vocab import Vocab
from ... import load as load_spacy
def test_issue1537():
string = 'The sky is blue . The man is pink . The dog is purple .'
doc = Doc(Vocab(), words=string.split())
doc[0].sent_start = True
for word in doc[1:]:
if word.nbor(-1).text == '.':
word.sent_start = True
else:
word.sent_start = False
sents = list(doc.sents)
sent0 = sents[0].as_doc()
sent1 = sents[1].as_doc()
assert isinstance(sent0, Doc)
assert isinstance(sent1, Doc)
# Currently segfaulting, due to l_edge and r_edge misalignment
#def test_issue1537_model():
# nlp = load_spacy('en')
# doc = nlp(u'The sky is blue. The man is pink. The dog is purple.')
# sents = [s.as_doc() for s in doc.sents]
# print(list(sents[0].noun_chunks))
# print(list(sents[1].noun_chunks))