unicode string for python 2.7

This commit is contained in:
svlandeg 2019-04-02 13:52:07 +02:00
parent eca9cc5417
commit 673c81bbb4
2 changed files with 5 additions and 5 deletions

View File

@ -70,6 +70,6 @@ yet you your yours yourself yourselves
)
for hyphen in ["'", "`", "", "´", ""]:
for stopword in "n't 'd 'll 'm 're 's 've".split():
for stopword in u"n't 'd 'll 'm 're 's 've".split():
STOP_WORDS.add(stopword.replace("'", hyphen))

View File

@ -4,10 +4,10 @@ import pytest
@pytest.mark.parametrize(
"word",
[
"don't",
"dont",
"I'd",
"Id",
u"don't",
u"dont",
u"I'd",
u"Id",
],
)
def test_issue3521(en_tokenizer, word):