addressed all comments by Ines

2025-07-08 22:03:24 +03:00 · 2019-04-03 13:50:33 +02:00 · 2019-04-03 13:50:33 +02:00 · 4ff786e113
commit 4ff786e113
parent 85b4319f33
3 changed files with 14 additions and 7 deletions
--- a/spacy/lang/en/stop_words.py
+++ b/spacy/lang/en/stop_words.py
@ -69,7 +69,10 @@ yet you your yours yourself yourselves
 """.split()
 )
-for hyphen in ["'", "`", "‘", "´", "’"]:
+contractions = ["n't", "'d", "'ll", "'m", "'re", "'s", "'ve"]
-    for stopword in u"n't 'd 'll 'm 're 's 've".split():
+STOP_WORDS.update(contractions)
-        STOP_WORDS.add(stopword.replace("'", hyphen))
+
 for apostrophe in ["‘", "’"]:
    for stopword in contractions:
        STOP_WORDS.add(stopword.replace("'", apostrophe))
--- a/spacy/tests/regression/test_issue3449.py
+++ b/spacy/tests/regression/test_issue3449.py
@ -1,4 +1,6 @@
 # coding: utf8
 from __future__ import unicode_literals
 import pytest
 from spacy.lang.en import English
--- a/spacy/tests/regression/test_issue3521.py
+++ b/spacy/tests/regression/test_issue3521.py
@ -1,14 +1,16 @@
 # coding: utf8
 from __future__ import unicode_literals
 import pytest
@pytest.mark.parametrize(
    "word",
    [
-        u"don't",
+        "don't",
-        u"don’t",
+        "don’t",
-        u"I'd",
+        "I'd",
-        u"I’d",
+        "I’d",
    ],
 )
 def test_issue3521(en_tokenizer, word):