From 02cfda48c95b10acf59d06d6c769c04c60736b49 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 5 Jan 2017 13:16:55 +0100 Subject: [PATCH] Modernize and merge tokenizer tests for string loading --- spacy/tests/tokenizer/test_string_loading.py | 9 --------- spacy/tests/tokenizer/test_tokenizer.py | 7 +++++++ 2 files changed, 7 insertions(+), 9 deletions(-) delete mode 100644 spacy/tests/tokenizer/test_string_loading.py diff --git a/spacy/tests/tokenizer/test_string_loading.py b/spacy/tests/tokenizer/test_string_loading.py deleted file mode 100644 index 1bc5539bc..000000000 --- a/spacy/tests/tokenizer/test_string_loading.py +++ /dev/null @@ -1,9 +0,0 @@ -"""Test suspected freeing of strings""" -from __future__ import unicode_literals - - -def test_one(en_tokenizer): - tokens = en_tokenizer('Betty Botter bought a pound of butter.') - assert tokens[0].orth_ == 'Betty' - tokens2 = en_tokenizer('Betty also bought a pound of butter.') - assert tokens2[0].orth_ == 'Betty' diff --git a/spacy/tests/tokenizer/test_tokenizer.py b/spacy/tests/tokenizer/test_tokenizer.py index f41969b4f..3bdf9095c 100644 --- a/spacy/tests/tokenizer/test_tokenizer.py +++ b/spacy/tests/tokenizer/test_tokenizer.py @@ -149,3 +149,10 @@ def test_ie(en_tokenizer): # text = 'But then the 6,000-year ice age came...' # tokens = EN.tokenize(text) # assert len(tokens) == 10 +def test_tokenizer_suspected_freeing_strings(en_tokenizer): + text1 = "Betty Botter bought a pound of butter." + text2 = "Betty also bought a pound of butter." + tokens1 = en_tokenizer(text1) + tokens2 = en_tokenizer(text2) + assert tokens1[0].text == "Betty" + assert tokens2[0].text == "Betty"