Auto-format [ci skip]

This commit is contained in:
Ines Montani 2020-10-15 10:08:53 +02:00
parent db16059f9b
commit d165af26be
3 changed files with 9 additions and 6 deletions

View File

@ -74,7 +74,7 @@ _eleven_to_beyond = [
"बावन", "बावन",
"तिरपन", "तिरेपन", "तिरपन", "तिरेपन",
"चौवन", "चउवन", "चौवन", "चउवन",
"पचपन", "पचपन",
"छप्पन", "छप्पन",
"सतावन", "सत्तावन", "सतावन", "सत्तावन",
"अठावन", "अठावन",
@ -91,7 +91,7 @@ _eleven_to_beyond = [
"उनहत्तर", "उनहत्तर",
"सत्तर", "सत्तर",
"इकहत्तर" "इकहत्तर"
"बहत्तर", "बहत्तर",
"तिहत्तर", "तिहत्तर",
"चौहत्तर", "चौहत्तर",
"पचहत्तर", "पचहत्तर",
@ -144,6 +144,7 @@ _ordinal_words_one_to_ten = [
_ordinal_suffix = "वाँ" _ordinal_suffix = "वाँ"
# fmt: on # fmt: on
def norm(string): def norm(string):
# normalise base exceptions, e.g. punctuation or currency symbols # normalise base exceptions, e.g. punctuation or currency symbols
if string in BASE_NORMS: if string in BASE_NORMS:
@ -180,7 +181,7 @@ def like_num(text):
if text in _ordinal_words_one_to_ten: if text in _ordinal_words_one_to_ten:
return True return True
if text.endswith(_ordinal_suffix): if text.endswith(_ordinal_suffix):
if text[:-len(_ordinal_suffix)] in _eleven_to_beyond: if text[: -len(_ordinal_suffix)] in _eleven_to_beyond:
return True return True
return False return False

View File

@ -297,7 +297,7 @@ def zh_tokenizer_pkuseg():
"segmenter": "pkuseg", "segmenter": "pkuseg",
} }
}, },
"initialize": {"tokenizer": {"pkuseg_model": "web",}}, "initialize": {"tokenizer": {"pkuseg_model": "web"}},
} }
nlp = get_lang_class("zh").from_config(config) nlp = get_lang_class("zh").from_config(config)
nlp.initialize() nlp.initialize()

View File

@ -28,14 +28,16 @@ def test_hi_norm(word, word_norm):
@pytest.mark.parametrize( @pytest.mark.parametrize(
"word", ["१९८७", "1987", "१२,२६७", "उन्नीस", "पाँच", "नवासी", "५/१०"], "word",
["१९८७", "1987", "१२,२६७", "उन्नीस", "पाँच", "नवासी", "५/१०"],
) )
def test_hi_like_num(word): def test_hi_like_num(word):
assert like_num(word) assert like_num(word)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"word", ["पहला", "तृतीय", "निन्यानवेवाँ", "उन्नीस", "तिहत्तरवाँ", "छत्तीसवाँ",], "word",
["पहला", "तृतीय", "निन्यानवेवाँ", "उन्नीस", "तिहत्तरवाँ", "छत्तीसवाँ"],
) )
def test_hi_like_num_ordinal_words(word): def test_hi_like_num_ordinal_words(word):
assert like_num(word) assert like_num(word)