From 52ef51f36e005a5d7033c21d08cfae17a561ebca Mon Sep 17 00:00:00 2001 From: Kit Date: Thu, 25 Jan 2018 22:56:48 +0100 Subject: [PATCH 1/2] Add test for issue #1889 --- spacy/tests/regression/test_issue1889.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 spacy/tests/regression/test_issue1889.py diff --git a/spacy/tests/regression/test_issue1889.py b/spacy/tests/regression/test_issue1889.py new file mode 100644 index 000000000..a0e20abcf --- /dev/null +++ b/spacy/tests/regression/test_issue1889.py @@ -0,0 +1,11 @@ +# coding: utf-8 +from __future__ import unicode_literals +from ...lang.lex_attrs import is_stop +from ...lang.en.stop_words import STOP_WORDS + +import pytest + + +@pytest.mark.parametrize('word', ['the']) +def test_lex_attrs_stop_words_case_sensitivity(word): + assert is_stop(word, STOP_WORDS) == is_stop(word.upper(), STOP_WORDS) From 4b42267ba3e2c26098e258f36e5529535a9e7d4f Mon Sep 17 00:00:00 2001 From: Kit Date: Thu, 25 Jan 2018 23:17:22 +0100 Subject: [PATCH 2/2] Fix issue #1889 --- spacy/lang/lex_attrs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/lang/lex_attrs.py b/spacy/lang/lex_attrs.py index c3bb4a8ff..d9b84e666 100644 --- a/spacy/lang/lex_attrs.py +++ b/spacy/lang/lex_attrs.py @@ -136,7 +136,7 @@ def is_lower(string): return string.islower() def is_space(string): return string.isspace() def is_title(string): return string.istitle() def is_upper(string): return string.isupper() -def is_stop(string, stops=set()): return string in stops +def is_stop(string, stops=set()): return string.lower() in stops def is_oov(string): return True def get_prob(string): return -20.