Auto-format [ci skip]

This commit is contained in:
Ines Montani 2020-10-15 10:08:53 +02:00
parent db16059f9b
commit d165af26be
3 changed files with 9 additions and 6 deletions

View File

@ -74,7 +74,7 @@ _eleven_to_beyond = [
"बावन",
"तिरपन", "तिरेपन",
"चौवन", "चउवन",
"पचपन",
"पचपन",
"छप्पन",
"सतावन", "सत्तावन",
"अठावन",
@ -91,7 +91,7 @@ _eleven_to_beyond = [
"उनहत्तर",
"सत्तर",
"इकहत्तर"
"बहत्तर",
"बहत्तर",
"तिहत्तर",
"चौहत्तर",
"पचहत्तर",
@ -144,6 +144,7 @@ _ordinal_words_one_to_ten = [
_ordinal_suffix = "वाँ"
# fmt: on
def norm(string):
# normalise base exceptions, e.g. punctuation or currency symbols
if string in BASE_NORMS:
@ -180,7 +181,7 @@ def like_num(text):
if text in _ordinal_words_one_to_ten:
return True
if text.endswith(_ordinal_suffix):
if text[:-len(_ordinal_suffix)] in _eleven_to_beyond:
if text[: -len(_ordinal_suffix)] in _eleven_to_beyond:
return True
return False

View File

@ -297,7 +297,7 @@ def zh_tokenizer_pkuseg():
"segmenter": "pkuseg",
}
},
"initialize": {"tokenizer": {"pkuseg_model": "web",}},
"initialize": {"tokenizer": {"pkuseg_model": "web"}},
}
nlp = get_lang_class("zh").from_config(config)
nlp.initialize()

View File

@ -28,14 +28,16 @@ def test_hi_norm(word, word_norm):
@pytest.mark.parametrize(
"word", ["१९८७", "1987", "१२,२६७", "उन्नीस", "पाँच", "नवासी", "५/१०"],
"word",
["१९८७", "1987", "१२,२६७", "उन्नीस", "पाँच", "नवासी", "५/१०"],
)
def test_hi_like_num(word):
assert like_num(word)
@pytest.mark.parametrize(
"word", ["पहला", "तृतीय", "निन्यानवेवाँ", "उन्नीस", "तिहत्तरवाँ", "छत्तीसवाँ",],
"word",
["पहला", "तृतीय", "निन्यानवेवाँ", "उन्नीस", "तिहत्तरवाँ", "छत्तीसवाँ"],
)
def test_hi_like_num_ordinal_words(word):
assert like_num(word)