diff --git a/spacy/tests/tokenizer/test_tokenizer.py b/spacy/tests/tokenizer/test_tokenizer.py index 92e610fe0..f41969b4f 100644 --- a/spacy/tests/tokenizer/test_tokenizer.py +++ b/spacy/tests/tokenizer/test_tokenizer.py @@ -143,11 +143,6 @@ def test_ie(en_tokenizer): assert tokens[3].orth_ == "i.e." -def test_two_whitespace(en_tokenizer): - orig_str = u'there are 2 spaces after this ' - tokens = en_tokenizer(orig_str) - assert repr(tokens.text_with_ws) == repr(orig_str) - #def test_cnts7(): diff --git a/spacy/tests/tokenizer/test_whitespace.py b/spacy/tests/tokenizer/test_whitespace.py index 9dd3a19a1..90dc80615 100644 --- a/spacy/tests/tokenizer/test_whitespace.py +++ b/spacy/tests/tokenizer/test_whitespace.py @@ -20,6 +20,12 @@ def test_tokenizer_splits_double_space(en_tokenizer, text): assert tokens[1].text == " " +@pytest.mark.parametrize('text', ["two spaces after this "]) +def test_tokenizer_handles_double_trainling_ws(en_tokenizer, text): + tokens = en_tokenizer(text) + assert repr(tokens.text_with_ws) == repr(text) + + @pytest.mark.parametrize('text', ["hello\npossums"]) def test_tokenizer_splits_newline(en_tokenizer, text): tokens = en_tokenizer(text)