mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 18:36:36 +03:00
0ac3d27689
Fix off-by-one error that meant trailing spaces were being dropped. Closes #792
19 lines
657 B
Python
19 lines
657 B
Python
# coding: utf-8
|
|
from __future__ import unicode_literals
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.mark.parametrize('text', ["This is a string ", "This is a string\u0020"])
|
|
def test_issue792(en_tokenizer, text):
|
|
"""Test for Issue #792: Trailing whitespace is removed after tokenization."""
|
|
doc = en_tokenizer(text)
|
|
assert ''.join([token.text_with_ws for token in doc]) == text
|
|
|
|
|
|
@pytest.mark.parametrize('text', ["This is a string", "This is a string\n"])
|
|
def test_control_issue792(en_tokenizer, text):
|
|
"""Test base case for Issue #792: Non-trailing whitespace"""
|
|
doc = en_tokenizer(text)
|
|
assert ''.join([token.text_with_ws for token in doc]) == text
|