Add missing lex_attr_getters (resolves #5806 )

This commit is contained in:
Ines Montani 2020-07-25 12:55:18 +02:00
parent c003d26b94
commit 4a0a692875
4 changed files with 8 additions and 0 deletions

View File

@ -3,6 +3,7 @@ from thinc.api import Config
from .stop_words import STOP_WORDS
from .tag_map import TAG_MAP
from .lex_attrs import LEX_ATTRS
from ...language import Language
from ...tokens import Doc
from ...compat import copy_reg
@ -64,6 +65,7 @@ class KoreanTokenizer(DummyTokenizer):
class KoreanDefaults(Language.Defaults):
config = Config().from_str(DEFAULT_CONFIG)
lex_attr_getters = LEX_ATTRS
stop_words = STOP_WORDS
writing_system = {"direction": "ltr", "has_case": False, "has_letters": False}

View File

@ -1,8 +1,10 @@
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
from ...language import Language
class MalayalamDefaults(Language.Defaults):
lex_attr_getters = LEX_ATTRS
stop_words = STOP_WORDS

View File

@ -2,6 +2,7 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .stop_words import STOP_WORDS
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
from .punctuation import TOKENIZER_SUFFIXES
from .lex_attrs import LEX_ATTRS
from ...language import Language
# Lemma data note:
@ -14,6 +15,7 @@ class RomanianDefaults(Language.Defaults):
prefixes = TOKENIZER_PREFIXES
suffixes = TOKENIZER_SUFFIXES
infixes = TOKENIZER_INFIXES
lex_attr_getters = LEX_ATTRS
stop_words = STOP_WORDS

View File

@ -1,10 +1,12 @@
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
from ...language import Language
class TurkishDefaults(Language.Defaults):
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
lex_attr_getters = LEX_ATTRS
stop_words = STOP_WORDS