mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-09 16:10:33 +03:00
Modernize and merge tokenizer tests for string loading
This commit is contained in:
parent
a11f684822
commit
02cfda48c9
|
@ -1,9 +0,0 @@
|
||||||
"""Test suspected freeing of strings"""
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
|
|
||||||
def test_one(en_tokenizer):
|
|
||||||
tokens = en_tokenizer('Betty Botter bought a pound of butter.')
|
|
||||||
assert tokens[0].orth_ == 'Betty'
|
|
||||||
tokens2 = en_tokenizer('Betty also bought a pound of butter.')
|
|
||||||
assert tokens2[0].orth_ == 'Betty'
|
|
|
@ -149,3 +149,10 @@ def test_ie(en_tokenizer):
|
||||||
# text = 'But then the 6,000-year ice age came...'
|
# text = 'But then the 6,000-year ice age came...'
|
||||||
# tokens = EN.tokenize(text)
|
# tokens = EN.tokenize(text)
|
||||||
# assert len(tokens) == 10
|
# assert len(tokens) == 10
|
||||||
|
def test_tokenizer_suspected_freeing_strings(en_tokenizer):
|
||||||
|
text1 = "Betty Botter bought a pound of butter."
|
||||||
|
text2 = "Betty also bought a pound of butter."
|
||||||
|
tokens1 = en_tokenizer(text1)
|
||||||
|
tokens2 = en_tokenizer(text2)
|
||||||
|
assert tokens1[0].text == "Betty"
|
||||||
|
assert tokens2[0].text == "Betty"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user