add test for the issue

This commit is contained in:
Thomas Opsomer 2018-01-28 19:49:56 +01:00
parent 6a8cb905aa
commit 45d62561f7

View File

@ -0,0 +1,27 @@
from __future__ import unicode_literals
from ...tokens import Doc
from ...vocab import Vocab
def test_issue1834():
"""test if sentence boundaries & parse/tag flags are not lost
during serialization
"""
words = "This is a first sentence . And another one".split()
vocab = Vocab()
doc = Doc(vocab, words=words)
vocab = doc.vocab
doc[6].sent_start = True
deser_doc = Doc(vocab).from_bytes(doc.to_bytes())
assert deser_doc[6].sent_start
assert not deser_doc.is_parsed
assert not deser_doc.is_tagged
doc.is_parsed = True
doc.is_tagged = True
deser_doc = Doc(vocab).from_bytes(doc.to_bytes())
assert deser_doc.is_parsed
assert deser_doc.is_tagged