From bbc75da38d4b3d7e65f992c18be0d55983f74d49 Mon Sep 17 00:00:00 2001 From: Jim Geovedi Date: Thu, 27 Jul 2017 10:51:15 +0700 Subject: [PATCH] enable syntax iterator and lemma lookup --- spacy/lang/id/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/spacy/lang/id/__init__.py b/spacy/lang/id/__init__.py index f8d90903f..a65a5b24f 100644 --- a/spacy/lang/id/__init__.py +++ b/spacy/lang/id/__init__.py @@ -5,10 +5,13 @@ from .stop_words import STOP_WORDS from .punctuation import TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES, TOKENIZER_INFIXES from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, TOKEN_MATCH from .norm_exceptions import NORM_EXCEPTIONS +from .lemmatizer import LOOKUP from .lex_attrs import LEX_ATTRS +from .syntax_iterators import SYNTAX_ITERATORS from ..tokenizer_exceptions import BASE_EXCEPTIONS from ...language import Language +from ...lemmatizerlookup import Lemmatizer from ...attrs import LANG from ...util import update_exc @@ -25,6 +28,11 @@ class IndonesianDefaults(Language.Defaults): prefixes = tuple(TOKENIZER_PREFIXES) suffixes = tuple(TOKENIZER_SUFFIXES) infixes = tuple(TOKENIZER_INFIXES) + syntax_iterators = dict(SYNTAX_ITERATORS) + + @classmethod + def create_lemmatizer(cls, nlp=None): + return Lemmatizer(LOOKUP) class Indonesian(Language):