Modernize and merge tokenizer tests for whitespace

This commit is contained in:
Ines Montani 2017-01-05 13:16:33 +01:00
parent 8b284fc6f1
commit a11f684822
2 changed files with 6 additions and 5 deletions

View File

@ -143,11 +143,6 @@ def test_ie(en_tokenizer):
assert tokens[3].orth_ == "i.e."
def test_two_whitespace(en_tokenizer):
orig_str = u'there are 2 spaces after this '
tokens = en_tokenizer(orig_str)
assert repr(tokens.text_with_ws) == repr(orig_str)
#def test_cnts7():

View File

@ -20,6 +20,12 @@ def test_tokenizer_splits_double_space(en_tokenizer, text):
assert tokens[1].text == " "
@pytest.mark.parametrize('text', ["two spaces after this "])
def test_tokenizer_handles_double_trainling_ws(en_tokenizer, text):
tokens = en_tokenizer(text)
assert repr(tokens.text_with_ws) == repr(text)
@pytest.mark.parametrize('text', ["hello\npossums"])
def test_tokenizer_splits_newline(en_tokenizer, text):
tokens = en_tokenizer(text)