diff --git a/spacy/tests/regression/test_issue3468.py b/spacy/tests/regression/test_issue3468.py new file mode 100644 index 000000000..77147c9aa --- /dev/null +++ b/spacy/tests/regression/test_issue3468.py @@ -0,0 +1,23 @@ +# coding: utf8 +from __future__ import unicode_literals + +import pytest +from spacy.lang.en import English +from spacy.tokens import Doc + + +@pytest.mark.xfail +def test_issue3468(): + """Test that sentence boundaries are serialized if they're not set by the + dependency parser.""" + nlp = English() + nlp.add_pipe(nlp.create_pipe("sentencizer")) + doc = nlp("Hello world") + assert doc.is_sentenced + assert doc[0].is_sent_start + assert len(list(doc.sents)) == 1 + doc_bytes = doc.to_bytes() + new_doc = Doc(nlp.vocab).from_bytes(doc_bytes) + assert new_doc.is_sentenced + assert doc[0].is_sent_start + assert len(list(new_doc.sents)) == 1