spaCy/spacy/lang/ms/__init__.py
Sani 873c16a4df
Malay language support (#12602)
* add malay lang

* fix token len

* black format

* reformat conftest malay

* remove exceptions not exist in dbp

* format code
2023-05-17 12:45:21 +02:00

25 lines
678 B
Python

from .stop_words import STOP_WORDS
from .punctuation import TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES, TOKENIZER_INFIXES
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .lex_attrs import LEX_ATTRS
from .syntax_iterators import SYNTAX_ITERATORS
from ...language import Language, BaseDefaults
class MalayDefaults(BaseDefaults):
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
prefixes = TOKENIZER_PREFIXES
suffixes = TOKENIZER_SUFFIXES
infixes = TOKENIZER_INFIXES
syntax_iterators = SYNTAX_ITERATORS
lex_attr_getters = LEX_ATTRS
stop_words = STOP_WORDS
class Malay(Language):
lang = "ms"
Defaults = MalayDefaults
__all__ = ["Malay"]