diff --git a/spacy/lang/en/stop_words.py b/spacy/lang/en/stop_words.py index bdc36bdd7..07d4ff34c 100644 --- a/spacy/lang/en/stop_words.py +++ b/spacy/lang/en/stop_words.py @@ -70,6 +70,6 @@ yet you your yours yourself yourselves ) for hyphen in ["'", "`", "‘", "´", "’"]: - for stopword in "n't 'd 'll 'm 're 's 've".split(): + for stopword in u"n't 'd 'll 'm 're 's 've".split(): STOP_WORDS.add(stopword.replace("'", hyphen)) diff --git a/spacy/tests/regression/test_issue3521.py b/spacy/tests/regression/test_issue3521.py index 2a4568a17..1fe5e00d7 100644 --- a/spacy/tests/regression/test_issue3521.py +++ b/spacy/tests/regression/test_issue3521.py @@ -4,10 +4,10 @@ import pytest @pytest.mark.parametrize( "word", [ - "don't", - "don’t", - "I'd", - "I’d", + u"don't", + u"don’t", + u"I'd", + u"I’d", ], ) def test_issue3521(en_tokenizer, word):