Added regression test for Issue #792.

This commit is contained in:
latkins 2017-01-31 13:47:42 +00:00
parent 6c665b81df
commit e4c84321a5

View File

@ -0,0 +1,13 @@
# coding: utf-8
from __future__ import unicode_literals
def test_issue792(en_tokenizer):
"""Test for Issue #792: Trailing whitespace is removed after parsing."""
text = "This is a string "
doc = en_tokenizer(text)
assert(doc.text_with_ws == text)
text_unicode = "This is a string\u0020"
doc_unicode = en_tokenizer(text_unicode)
assert(doc_unicode.text_with_ws == text_unicode)