diff --git a/spacy/lang/lex_attrs.py b/spacy/lang/lex_attrs.py index f1279f035..009b6c3c3 100644 --- a/spacy/lang/lex_attrs.py +++ b/spacy/lang/lex_attrs.py @@ -144,7 +144,7 @@ def is_lower(string): return string.islower() def is_space(string): return string.isspace() def is_title(string): return string.istitle() def is_upper(string): return string.isupper() -def is_stop(string, stops=set()): return string in stops +def is_stop(string, stops=set()): return string.lower() in stops def is_oov(string): return True def get_prob(string): return -20. diff --git a/spacy/tests/regression/test_issue1889.py b/spacy/tests/regression/test_issue1889.py new file mode 100644 index 000000000..a0e20abcf --- /dev/null +++ b/spacy/tests/regression/test_issue1889.py @@ -0,0 +1,11 @@ +# coding: utf-8 +from __future__ import unicode_literals +from ...lang.lex_attrs import is_stop +from ...lang.en.stop_words import STOP_WORDS + +import pytest + + +@pytest.mark.parametrize('word', ['the']) +def test_lex_attrs_stop_words_case_sensitivity(word): + assert is_stop(word, STOP_WORDS) == is_stop(word.upper(), STOP_WORDS)