Modernize and merge tokenizer tests for string loading

2025-11-28 22:06:11 +03:00 · 2017-01-05 13:16:55 +01:00 · 2017-01-05 13:16:55 +01:00 · 02cfda48c9
commit 02cfda48c9
parent a11f684822
2 changed files with 7 additions and 9 deletions
--- a/spacy/tests/tokenizer/test_string_loading.py
+++ b/spacy/tests/tokenizer/test_string_loading.py
@ -1,9 +0,0 @@
-"""Test suspected freeing of strings"""
-from __future__ import unicode_literals
-
-
-def test_one(en_tokenizer):
-    tokens = en_tokenizer('Betty Botter bought a pound of butter.')
-    assert tokens[0].orth_ == 'Betty'
-    tokens2 = en_tokenizer('Betty also bought a pound of butter.')
-    assert tokens2[0].orth_ == 'Betty'
--- a/spacy/tests/tokenizer/test_tokenizer.py
+++ b/spacy/tests/tokenizer/test_tokenizer.py
@ -149,3 +149,10 @@ def test_ie(en_tokenizer):
 #    text = 'But then the 6,000-year ice age came...'
 #    tokens = EN.tokenize(text)
 #    assert len(tokens) == 10
+def test_tokenizer_suspected_freeing_strings(en_tokenizer):
+    text1 = "Betty Botter bought a pound of butter."
+    text2 = "Betty also bought a pound of butter."
+    tokens1 = en_tokenizer(text1)
+    tokens2 = en_tokenizer(text2)
+    assert tokens1[0].text == "Betty"
+    assert tokens2[0].text == "Betty"