From 673c81bbb4c832c645768302415108327f9c878a Mon Sep 17 00:00:00 2001 From: svlandeg Date: Tue, 2 Apr 2019 13:52:07 +0200 Subject: [PATCH] unicode string for python 2.7 --- spacy/lang/en/stop_words.py | 2 +- spacy/tests/regression/test_issue3521.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/spacy/lang/en/stop_words.py b/spacy/lang/en/stop_words.py index bdc36bdd7..07d4ff34c 100644 --- a/spacy/lang/en/stop_words.py +++ b/spacy/lang/en/stop_words.py @@ -70,6 +70,6 @@ yet you your yours yourself yourselves ) for hyphen in ["'", "`", "‘", "´", "’"]: - for stopword in "n't 'd 'll 'm 're 's 've".split(): + for stopword in u"n't 'd 'll 'm 're 's 've".split(): STOP_WORDS.add(stopword.replace("'", hyphen)) diff --git a/spacy/tests/regression/test_issue3521.py b/spacy/tests/regression/test_issue3521.py index 2a4568a17..1fe5e00d7 100644 --- a/spacy/tests/regression/test_issue3521.py +++ b/spacy/tests/regression/test_issue3521.py @@ -4,10 +4,10 @@ import pytest @pytest.mark.parametrize( "word", [ - "don't", - "don’t", - "I'd", - "I’d", + u"don't", + u"don’t", + u"I'd", + u"I’d", ], ) def test_issue3521(en_tokenizer, word):