Modernize and merge tokenizer tests for whitespace

2025-08-01 10:59:55 +03:00 · 2017-01-05 13:16:33 +01:00 · 2017-01-05 13:16:33 +01:00 · a11f684822
commit a11f684822
parent 8b284fc6f1
2 changed files with 6 additions and 5 deletions
--- a/spacy/tests/tokenizer/test_tokenizer.py
+++ b/spacy/tests/tokenizer/test_tokenizer.py
@ -143,11 +143,6 @@ def test_ie(en_tokenizer):
    assert tokens[3].orth_ == "i.e."


-def test_two_whitespace(en_tokenizer):
-    orig_str = u'there are 2 spaces after this  '
-    tokens = en_tokenizer(orig_str)
-    assert repr(tokens.text_with_ws) == repr(orig_str)
-


 #def test_cnts7():
--- a/spacy/tests/tokenizer/test_whitespace.py
+++ b/spacy/tests/tokenizer/test_whitespace.py
@ -20,6 +20,12 @@ def test_tokenizer_splits_double_space(en_tokenizer, text):
    assert tokens[1].text == " "


+@pytest.mark.parametrize('text', ["two spaces after this  "])
+def test_tokenizer_handles_double_trainling_ws(en_tokenizer, text):
+    tokens = en_tokenizer(text)
+    assert repr(tokens.text_with_ws) == repr(text)
+
+
@pytest.mark.parametrize('text', ["hello\npossums"])
 def test_tokenizer_splits_newline(en_tokenizer, text):
    tokens = en_tokenizer(text)