Update language data

2025-09-15 16:42:36 +03:00 · 2021-01-27 13:29:22 +11:00 · 2021-01-27 13:29:22 +11:00 · e3f8be9a94
commit e3f8be9a94
parent 230e651ad6
6 changed files with 7 additions and 31 deletions
--- a/spacy/lang/ky/init.py
+++ b/spacy/lang/ky/init.py
@ -1,25 +1,14 @@
 # coding: utf8
 from __future__ import unicode_literals
 from .lex_attrs import LEX_ATTRS
 from .punctuation import TOKENIZER_INFIXES
 from .stop_words import STOP_WORDS
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
 from ...attrs import LANG
 from ...language import Language
 from ...util import update_exc
 class KyrgyzDefaults(Language.Defaults):
-    lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
+    tokenizer_exceptions = TOKENIZER_EXCEPTIONS
-    lex_attr_getters[LANG] = lambda text: "ky"
+    infixes = TOKENIZER_INFIXES
-
+    lex_attr_getters = LEX_ATTRS
    lex_attr_getters.update(LEX_ATTRS)
    tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
    infixes = tuple(TOKENIZER_INFIXES)
    stop_words = STOP_WORDS
--- a/spacy/lang/ky/examples.py
+++ b/spacy/lang/ky/examples.py
@ -1,6 +1,3 @@
 # coding: utf8
 from __future__ import unicode_literals
 """
 Example sentences to test spaCy and its language models.
 >>> from spacy.lang.ky.examples import sentences
--- a/spacy/lang/ky/lex_attrs.py
+++ b/spacy/lang/ky/lex_attrs.py
@ -1,6 +1,3 @@
 # coding: utf8
 from __future__ import unicode_literals
 from ...attrs import LIKE_NUM
 _num_words = [
--- a/spacy/lang/ky/punctuation.py
+++ b/spacy/lang/ky/punctuation.py
@ -1,6 +1,3 @@
 # coding: utf8
 from __future__ import unicode_literals
 from ..char_classes import ALPHA, ALPHA_LOWER, ALPHA_UPPER, CONCAT_QUOTES, HYPHENS
 from ..char_classes import LIST_ELLIPSES, LIST_ICONS
--- a/spacy/lang/ky/stop_words.py
+++ b/spacy/lang/ky/stop_words.py
@ -1,8 +1,5 @@
 # encoding: utf8
 from __future__ import unicode_literals
 STOP_WORDS = set(
-"""
+    """
 ага адам айтты айтымында айтып ал алар
 алардын алган алуу алып анда андан аны
 анын ар
--- a/spacy/lang/ky/tokenizer_exceptions.py
+++ b/spacy/lang/ky/tokenizer_exceptions.py
@ -1,7 +1,6 @@
-# coding: utf8
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
 from __future__ import unicode_literals
 from ...symbols import ORTH, LEMMA, NORM
 from ...util import update_exc
 _exc = {}
@ -52,4 +51,4 @@ for exc_data in [  # "etc." abbreviations
    exc_data[LEMMA] = exc_data[NORM]
    _exc[exc_data[ORTH]] = [exc_data]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)