diff --git a/spacy/lang/ko/__init__.py b/spacy/lang/ko/__init__.py index d2af9c4b1..6197ab927 100644 --- a/spacy/lang/ko/__init__.py +++ b/spacy/lang/ko/__init__.py @@ -3,6 +3,7 @@ from thinc.api import Config from .stop_words import STOP_WORDS from .tag_map import TAG_MAP +from .lex_attrs import LEX_ATTRS from ...language import Language from ...tokens import Doc from ...compat import copy_reg @@ -64,6 +65,7 @@ class KoreanTokenizer(DummyTokenizer): class KoreanDefaults(Language.Defaults): config = Config().from_str(DEFAULT_CONFIG) + lex_attr_getters = LEX_ATTRS stop_words = STOP_WORDS writing_system = {"direction": "ltr", "has_case": False, "has_letters": False} diff --git a/spacy/lang/ml/__init__.py b/spacy/lang/ml/__init__.py index 166d0e061..cfad52261 100644 --- a/spacy/lang/ml/__init__.py +++ b/spacy/lang/ml/__init__.py @@ -1,8 +1,10 @@ from .stop_words import STOP_WORDS +from .lex_attrs import LEX_ATTRS from ...language import Language class MalayalamDefaults(Language.Defaults): + lex_attr_getters = LEX_ATTRS stop_words = STOP_WORDS diff --git a/spacy/lang/ro/__init__.py b/spacy/lang/ro/__init__.py index 74016d3e9..f0d8d8d31 100644 --- a/spacy/lang/ro/__init__.py +++ b/spacy/lang/ro/__init__.py @@ -2,6 +2,7 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .stop_words import STOP_WORDS from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES from .punctuation import TOKENIZER_SUFFIXES +from .lex_attrs import LEX_ATTRS from ...language import Language # Lemma data note: @@ -14,6 +15,7 @@ class RomanianDefaults(Language.Defaults): prefixes = TOKENIZER_PREFIXES suffixes = TOKENIZER_SUFFIXES infixes = TOKENIZER_INFIXES + lex_attr_getters = LEX_ATTRS stop_words = STOP_WORDS diff --git a/spacy/lang/tr/__init__.py b/spacy/lang/tr/__init__.py index 70b277487..8bd0b93df 100644 --- a/spacy/lang/tr/__init__.py +++ b/spacy/lang/tr/__init__.py @@ -1,10 +1,12 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .stop_words import STOP_WORDS +from .lex_attrs import LEX_ATTRS from ...language import Language class TurkishDefaults(Language.Defaults): tokenizer_exceptions = TOKENIZER_EXCEPTIONS + lex_attr_getters = LEX_ATTRS stop_words = STOP_WORDS