Modernize and merge tokenizer tests for string loading

This commit is contained in:
Ines Montani 2017-01-05 13:16:55 +01:00
parent a11f684822
commit 02cfda48c9
2 changed files with 7 additions and 9 deletions

View File

@ -1,9 +0,0 @@
"""Test suspected freeing of strings"""
from __future__ import unicode_literals
def test_one(en_tokenizer):
tokens = en_tokenizer('Betty Botter bought a pound of butter.')
assert tokens[0].orth_ == 'Betty'
tokens2 = en_tokenizer('Betty also bought a pound of butter.')
assert tokens2[0].orth_ == 'Betty'

View File

@ -149,3 +149,10 @@ def test_ie(en_tokenizer):
# text = 'But then the 6,000-year ice age came...'
# tokens = EN.tokenize(text)
# assert len(tokens) == 10
def test_tokenizer_suspected_freeing_strings(en_tokenizer):
text1 = "Betty Botter bought a pound of butter."
text2 = "Betty also bought a pound of butter."
tokens1 = en_tokenizer(text1)
tokens2 = en_tokenizer(text2)
assert tokens1[0].text == "Betty"
assert tokens2[0].text == "Betty"