diff --git a/spacy/he/__init__.py b/spacy/he/__init__.py index 9426247bd..839d174a0 100644 --- a/spacy/he/__init__.py +++ b/spacy/he/__init__.py @@ -1,10 +1,12 @@ -# encoding: utf8 -from __future__ import unicode_literals, print_function +# coding: utf8 +from __future__ import unicode_literals +from .stop_words import STOP_WORDS + +from ..language_data import BASE_EXCEPTIONS from ..language import Language from ..attrs import LANG - -from .language_data import * +from ..util import update_exc class Hebrew(Language): @@ -14,8 +16,8 @@ class Hebrew(Language): lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters[LANG] = lambda text: 'he' - tokenizer_exceptions = TOKENIZER_EXCEPTIONS - stop_words = STOP_WORDS + tokenizer_exceptions = update_exc(BASE_EXCEPTIONS) + stop_words = set(STOP_WORDS) -EXPORT = Hebrew \ No newline at end of file +__all__ = ['Hebrew'] diff --git a/spacy/he/stop_words.py b/spacy/he/stop_words.py index 2914fa0d5..329c8847a 100644 --- a/spacy/he/stop_words.py +++ b/spacy/he/stop_words.py @@ -1,6 +1,7 @@ # encoding: utf8 from __future__ import unicode_literals + STOP_WORDS = set(""" אני את