diff --git a/spacy/lang/id/__init__.py b/spacy/lang/id/__init__.py index f8d90903f..a65a5b24f 100644 --- a/spacy/lang/id/__init__.py +++ b/spacy/lang/id/__init__.py @@ -5,10 +5,13 @@ from .stop_words import STOP_WORDS from .punctuation import TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES, TOKENIZER_INFIXES from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, TOKEN_MATCH from .norm_exceptions import NORM_EXCEPTIONS +from .lemmatizer import LOOKUP from .lex_attrs import LEX_ATTRS +from .syntax_iterators import SYNTAX_ITERATORS from ..tokenizer_exceptions import BASE_EXCEPTIONS from ...language import Language +from ...lemmatizerlookup import Lemmatizer from ...attrs import LANG from ...util import update_exc @@ -25,6 +28,11 @@ class IndonesianDefaults(Language.Defaults): prefixes = tuple(TOKENIZER_PREFIXES) suffixes = tuple(TOKENIZER_SUFFIXES) infixes = tuple(TOKENIZER_INFIXES) + syntax_iterators = dict(SYNTAX_ITERATORS) + + @classmethod + def create_lemmatizer(cls, nlp=None): + return Lemmatizer(LOOKUP) class Indonesian(Language):