mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Auto-format [ci skip]
This commit is contained in:
parent
db16059f9b
commit
d165af26be
|
@ -74,7 +74,7 @@ _eleven_to_beyond = [
|
|||
"बावन",
|
||||
"तिरपन", "तिरेपन",
|
||||
"चौवन", "चउवन",
|
||||
"पचपन",
|
||||
"पचपन",
|
||||
"छप्पन",
|
||||
"सतावन", "सत्तावन",
|
||||
"अठावन",
|
||||
|
@ -91,7 +91,7 @@ _eleven_to_beyond = [
|
|||
"उनहत्तर",
|
||||
"सत्तर",
|
||||
"इकहत्तर"
|
||||
"बहत्तर",
|
||||
"बहत्तर",
|
||||
"तिहत्तर",
|
||||
"चौहत्तर",
|
||||
"पचहत्तर",
|
||||
|
@ -144,6 +144,7 @@ _ordinal_words_one_to_ten = [
|
|||
_ordinal_suffix = "वाँ"
|
||||
# fmt: on
|
||||
|
||||
|
||||
def norm(string):
|
||||
# normalise base exceptions, e.g. punctuation or currency symbols
|
||||
if string in BASE_NORMS:
|
||||
|
@ -180,7 +181,7 @@ def like_num(text):
|
|||
if text in _ordinal_words_one_to_ten:
|
||||
return True
|
||||
if text.endswith(_ordinal_suffix):
|
||||
if text[:-len(_ordinal_suffix)] in _eleven_to_beyond:
|
||||
if text[: -len(_ordinal_suffix)] in _eleven_to_beyond:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
|
|
@ -297,7 +297,7 @@ def zh_tokenizer_pkuseg():
|
|||
"segmenter": "pkuseg",
|
||||
}
|
||||
},
|
||||
"initialize": {"tokenizer": {"pkuseg_model": "web",}},
|
||||
"initialize": {"tokenizer": {"pkuseg_model": "web"}},
|
||||
}
|
||||
nlp = get_lang_class("zh").from_config(config)
|
||||
nlp.initialize()
|
||||
|
|
|
@ -28,14 +28,16 @@ def test_hi_norm(word, word_norm):
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"word", ["१९८७", "1987", "१२,२६७", "उन्नीस", "पाँच", "नवासी", "५/१०"],
|
||||
"word",
|
||||
["१९८७", "1987", "१२,२६७", "उन्नीस", "पाँच", "नवासी", "५/१०"],
|
||||
)
|
||||
def test_hi_like_num(word):
|
||||
assert like_num(word)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"word", ["पहला", "तृतीय", "निन्यानवेवाँ", "उन्नीस", "तिहत्तरवाँ", "छत्तीसवाँ",],
|
||||
"word",
|
||||
["पहला", "तृतीय", "निन्यानवेवाँ", "उन्नीस", "तिहत्तरवाँ", "छत्तीसवाँ"],
|
||||
)
|
||||
def test_hi_like_num_ordinal_words(word):
|
||||
assert like_num(word)
|
||||
|
|
Loading…
Reference in New Issue
Block a user