mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
32 lines
962 B
Python
32 lines
962 B
Python
'''Test Span.as_doc() doesn't segfault'''
|
|
from __future__ import unicode_literals
|
|
from ...tokens import Doc
|
|
from ...vocab import Vocab
|
|
from ... import load as load_spacy
|
|
|
|
|
|
def test_issue1537():
|
|
string = 'The sky is blue . The man is pink . The dog is purple .'
|
|
doc = Doc(Vocab(), words=string.split())
|
|
doc[0].sent_start = True
|
|
for word in doc[1:]:
|
|
if word.nbor(-1).text == '.':
|
|
word.sent_start = True
|
|
else:
|
|
word.sent_start = False
|
|
|
|
sents = list(doc.sents)
|
|
sent0 = sents[0].as_doc()
|
|
sent1 = sents[1].as_doc()
|
|
assert isinstance(sent0, Doc)
|
|
assert isinstance(sent1, Doc)
|
|
|
|
|
|
# Currently segfaulting, due to l_edge and r_edge misalignment
|
|
#def test_issue1537_model():
|
|
# nlp = load_spacy('en')
|
|
# doc = nlp(u'The sky is blue. The man is pink. The dog is purple.')
|
|
# sents = [s.as_doc() for s in doc.sents]
|
|
# print(list(sents[0].noun_chunks))
|
|
# print(list(sents[1].noun_chunks))
|