diff --git a/spacy/lang/char_classes.py b/spacy/lang/char_classes.py index 3fb0fb41e..d876d375a 100644 --- a/spacy/lang/char_classes.py +++ b/spacy/lang/char_classes.py @@ -207,6 +207,10 @@ _tatar_lower = r"әөүҗңһ" _tatar_upper = r"ӘӨҮҖҢҺ" _tatar = r"әөүҗңһӘӨҮҖҢҺ" +_kyrgyz_lower = r"өңү" +_kyrgyz_upper = r"ӨҢҮ" +_kyrgyz = r"өңүӨҢҮ" + _greek_lower = r"α-ωάέίόώήύ" _greek_upper = r"Α-ΩΆΈΊΌΏΉΎ" _greek = r"α-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎ" @@ -219,8 +223,8 @@ _macedonian_lower = r"ѓѕјљњќѐѝ" _macedonian_upper = r"ЃЅЈЉЊЌЀЍ" _macedonian = r"ѓѕјљњќѐѝЃЅЈЉЊЌЀЍ" -_upper = LATIN_UPPER + _russian_upper + _tatar_upper + _greek_upper + _ukrainian_upper + _macedonian_upper -_lower = LATIN_LOWER + _russian_lower + _tatar_lower + _greek_lower + _ukrainian_lower + _macedonian_lower +_upper = LATIN_UPPER + _russian_upper + _tatar_upper + _kyrgyz_upper + _greek_upper + _ukrainian_upper + _macedonian_upper +_lower = LATIN_LOWER + _russian_lower + _tatar_lower + _kyrgyz_lower + _greek_lower + _ukrainian_lower + _macedonian_lower _uncased = ( _ethiopic @@ -236,7 +240,7 @@ _uncased = ( + _cjk ) -ALPHA = group_chars(LATIN + _russian + _tatar + _greek + _ukrainian + _macedonian + _uncased) +ALPHA = group_chars(LATIN + _russian + _tatar + _kyrgyz + _greek + _ukrainian + _macedonian + _uncased) ALPHA_LOWER = group_chars(_lower + _uncased) ALPHA_UPPER = group_chars(_upper + _uncased)