mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Auto-format [ci skip]
This commit is contained in:
parent
db16059f9b
commit
d165af26be
|
@ -74,7 +74,7 @@ _eleven_to_beyond = [
|
||||||
"बावन",
|
"बावन",
|
||||||
"तिरपन", "तिरेपन",
|
"तिरपन", "तिरेपन",
|
||||||
"चौवन", "चउवन",
|
"चौवन", "चउवन",
|
||||||
"पचपन",
|
"पचपन",
|
||||||
"छप्पन",
|
"छप्पन",
|
||||||
"सतावन", "सत्तावन",
|
"सतावन", "सत्तावन",
|
||||||
"अठावन",
|
"अठावन",
|
||||||
|
@ -91,7 +91,7 @@ _eleven_to_beyond = [
|
||||||
"उनहत्तर",
|
"उनहत्तर",
|
||||||
"सत्तर",
|
"सत्तर",
|
||||||
"इकहत्तर"
|
"इकहत्तर"
|
||||||
"बहत्तर",
|
"बहत्तर",
|
||||||
"तिहत्तर",
|
"तिहत्तर",
|
||||||
"चौहत्तर",
|
"चौहत्तर",
|
||||||
"पचहत्तर",
|
"पचहत्तर",
|
||||||
|
@ -144,6 +144,7 @@ _ordinal_words_one_to_ten = [
|
||||||
_ordinal_suffix = "वाँ"
|
_ordinal_suffix = "वाँ"
|
||||||
# fmt: on
|
# fmt: on
|
||||||
|
|
||||||
|
|
||||||
def norm(string):
|
def norm(string):
|
||||||
# normalise base exceptions, e.g. punctuation or currency symbols
|
# normalise base exceptions, e.g. punctuation or currency symbols
|
||||||
if string in BASE_NORMS:
|
if string in BASE_NORMS:
|
||||||
|
@ -180,7 +181,7 @@ def like_num(text):
|
||||||
if text in _ordinal_words_one_to_ten:
|
if text in _ordinal_words_one_to_ten:
|
||||||
return True
|
return True
|
||||||
if text.endswith(_ordinal_suffix):
|
if text.endswith(_ordinal_suffix):
|
||||||
if text[:-len(_ordinal_suffix)] in _eleven_to_beyond:
|
if text[: -len(_ordinal_suffix)] in _eleven_to_beyond:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
|
@ -297,7 +297,7 @@ def zh_tokenizer_pkuseg():
|
||||||
"segmenter": "pkuseg",
|
"segmenter": "pkuseg",
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"initialize": {"tokenizer": {"pkuseg_model": "web",}},
|
"initialize": {"tokenizer": {"pkuseg_model": "web"}},
|
||||||
}
|
}
|
||||||
nlp = get_lang_class("zh").from_config(config)
|
nlp = get_lang_class("zh").from_config(config)
|
||||||
nlp.initialize()
|
nlp.initialize()
|
||||||
|
|
|
@ -28,14 +28,16 @@ def test_hi_norm(word, word_norm):
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"word", ["१९८७", "1987", "१२,२६७", "उन्नीस", "पाँच", "नवासी", "५/१०"],
|
"word",
|
||||||
|
["१९८७", "1987", "१२,२६७", "उन्नीस", "पाँच", "नवासी", "५/१०"],
|
||||||
)
|
)
|
||||||
def test_hi_like_num(word):
|
def test_hi_like_num(word):
|
||||||
assert like_num(word)
|
assert like_num(word)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"word", ["पहला", "तृतीय", "निन्यानवेवाँ", "उन्नीस", "तिहत्तरवाँ", "छत्तीसवाँ",],
|
"word",
|
||||||
|
["पहला", "तृतीय", "निन्यानवेवाँ", "उन्नीस", "तिहत्तरवाँ", "छत्तीसवाँ"],
|
||||||
)
|
)
|
||||||
def test_hi_like_num_ordinal_words(word):
|
def test_hi_like_num_ordinal_words(word):
|
||||||
assert like_num(word)
|
assert like_num(word)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user