Fix Azerbaijani init, extend lang init tests (#8656)

* Extend langs in initialize tests

* Fix az init
This commit is contained in:
Adriane Boyd 2021-07-09 15:36:35 +02:00 committed by GitHub
parent 1c0ed22d1e
commit b8e720fdb9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 7 additions and 11 deletions

View File

@ -1,16 +1,11 @@
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, TOKEN_MATCH
from .stop_words import STOP_WORDS
from .syntax_iterators import SYNTAX_ITERATORS
from .lex_attrs import LEX_ATTRS
from ...language import Language
class AzerbaijaniDefaults(Language.Defaults):
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
lex_attr_getters = LEX_ATTRS
stop_words = STOP_WORDS
token_match = TOKEN_MATCH
syntax_iterators = SYNTAX_ITERATORS
class Azerbaijani(Language):

View File

@ -4,12 +4,13 @@ from spacy.util import get_lang_class
# fmt: off
# Only include languages with no external dependencies
# excluded: ja, ru, th, uk, vi, zh
LANGUAGES = ["af", "ar", "bg", "bn", "ca", "cs", "da", "de", "el", "en", "es",
"et", "fa", "fi", "fr", "ga", "he", "hi", "hr", "hu", "id", "is",
"it", "kn", "lt", "lv", "nb", "nl", "pl", "pt", "ro", "si", "sk",
"sl", "sq", "sr", "sv", "ta", "te", "tl", "tn", "tr", "tt", "ur",
"yo"]
# excluded: ja, ko, th, vi, zh
LANGUAGES = ["af", "am", "ar", "az", "bg", "bn", "ca", "cs", "da", "de", "el",
"en", "es", "et", "eu", "fa", "fi", "fr", "ga", "gu", "he", "hi",
"hr", "hu", "hy", "id", "is", "it", "kn", "ky", "lb", "lt", "lv",
"mk", "ml", "mr", "nb", "ne", "nl", "pl", "pt", "ro", "ru", "sa",
"si", "sk", "sl", "sq", "sr", "sv", "ta", "te", "ti", "tl", "tn",
"tr", "tt", "uk", "ur", "xx", "yo"]
# fmt: on