mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Remove Kyrgyz chars fr. char_classes since Tatar ones already cover
This commit is contained in:
parent
7d0154a36e
commit
2b19ebad59
|
@ -207,10 +207,6 @@ _tatar_lower = r"әөүҗңһ"
|
|||
_tatar_upper = r"ӘӨҮҖҢҺ"
|
||||
_tatar = r"әөүҗңһӘӨҮҖҢҺ"
|
||||
|
||||
_kyrgyz_lower = r"өңү"
|
||||
_kyrgyz_upper = r"ӨҢҮ"
|
||||
_kyrgyz = r"өңүӨҢҮ"
|
||||
|
||||
_greek_lower = r"α-ωάέίόώήύ"
|
||||
_greek_upper = r"Α-ΩΆΈΊΌΏΉΎ"
|
||||
_greek = r"α-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎ"
|
||||
|
@ -223,8 +219,8 @@ _macedonian_lower = r"ѓѕјљњќѐѝ"
|
|||
_macedonian_upper = r"ЃЅЈЉЊЌЀЍ"
|
||||
_macedonian = r"ѓѕјљњќѐѝЃЅЈЉЊЌЀЍ"
|
||||
|
||||
_upper = LATIN_UPPER + _russian_upper + _tatar_upper + _kyrgyz_upper + _greek_upper + _ukrainian_upper + _macedonian_upper
|
||||
_lower = LATIN_LOWER + _russian_lower + _tatar_lower + _kyrgyz_lower + _greek_lower + _ukrainian_lower + _macedonian_lower
|
||||
_upper = LATIN_UPPER + _russian_upper + _tatar_upper + _greek_upper + _ukrainian_upper + _macedonian_upper
|
||||
_lower = LATIN_LOWER + _russian_lower + _tatar_lower + _greek_lower + _ukrainian_lower + _macedonian_lower
|
||||
|
||||
_uncased = (
|
||||
_ethiopic
|
||||
|
@ -240,7 +236,7 @@ _uncased = (
|
|||
+ _cjk
|
||||
)
|
||||
|
||||
ALPHA = group_chars(LATIN + _russian + _tatar + _kyrgyz + _greek + _ukrainian + _macedonian + _uncased)
|
||||
ALPHA = group_chars(LATIN + _russian + _tatar + _greek + _ukrainian + _macedonian + _uncased)
|
||||
ALPHA_LOWER = group_chars(_lower + _uncased)
|
||||
ALPHA_UPPER = group_chars(_upper + _uncased)
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user