mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
Modernize and merge tokenizer tests for whitespace
This commit is contained in:
parent
8b284fc6f1
commit
a11f684822
|
@ -143,11 +143,6 @@ def test_ie(en_tokenizer):
|
|||
assert tokens[3].orth_ == "i.e."
|
||||
|
||||
|
||||
def test_two_whitespace(en_tokenizer):
|
||||
orig_str = u'there are 2 spaces after this '
|
||||
tokens = en_tokenizer(orig_str)
|
||||
assert repr(tokens.text_with_ws) == repr(orig_str)
|
||||
|
||||
|
||||
|
||||
#def test_cnts7():
|
||||
|
|
|
@ -20,6 +20,12 @@ def test_tokenizer_splits_double_space(en_tokenizer, text):
|
|||
assert tokens[1].text == " "
|
||||
|
||||
|
||||
@pytest.mark.parametrize('text', ["two spaces after this "])
|
||||
def test_tokenizer_handles_double_trainling_ws(en_tokenizer, text):
|
||||
tokens = en_tokenizer(text)
|
||||
assert repr(tokens.text_with_ws) == repr(text)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('text', ["hello\npossums"])
|
||||
def test_tokenizer_splits_newline(en_tokenizer, text):
|
||||
tokens = en_tokenizer(text)
|
||||
|
|
Loading…
Reference in New Issue
Block a user