unicode string for python 2.7

This commit is contained in:
svlandeg 2019-04-02 13:52:07 +02:00
parent eca9cc5417
commit 673c81bbb4
2 changed files with 5 additions and 5 deletions

View File

@ -70,6 +70,6 @@ yet you your yours yourself yourselves
) )
for hyphen in ["'", "`", "", "´", ""]: for hyphen in ["'", "`", "", "´", ""]:
for stopword in "n't 'd 'll 'm 're 's 've".split(): for stopword in u"n't 'd 'll 'm 're 's 've".split():
STOP_WORDS.add(stopword.replace("'", hyphen)) STOP_WORDS.add(stopword.replace("'", hyphen))

View File

@ -4,10 +4,10 @@ import pytest
@pytest.mark.parametrize( @pytest.mark.parametrize(
"word", "word",
[ [
"don't", u"don't",
"dont", u"dont",
"I'd", u"I'd",
"Id", u"Id",
], ],
) )
def test_issue3521(en_tokenizer, word): def test_issue3521(en_tokenizer, word):