added inflix rules

This commit is contained in:
Jim Geovedi 2017-07-24 06:17:07 +07:00
parent d0ec484725
commit 082e94ac1c

View File

@ -2,7 +2,7 @@
from __future__ import unicode_literals
from .stop_words import STOP_WORDS
from .punctuation import TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES
from .punctuation import TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES, TOKENIZER_INFIXES
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .norm_exceptions import NORM_EXCEPTIONS
from .lex_attrs import LEX_ATTRS
@ -23,6 +23,7 @@ class IndonesianDefaults(Language.Defaults):
stop_words = set(STOP_WORDS)
prefixes = tuple(TOKENIZER_PREFIXES)
suffixes = tuple(TOKENIZER_SUFFIXES)
infixes = tuple(TOKENIZER_INFIXES)
class Indonesian(Language):