mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
English: adds ordinal numbers (#5830)
This commit is contained in:
parent
90b958fd01
commit
f76fae0e8d
|
@ -44,6 +44,44 @@ _num_words = [
|
|||
]
|
||||
|
||||
|
||||
_ordinal_words = [
|
||||
"first",
|
||||
"second",
|
||||
"third",
|
||||
"fourth",
|
||||
"fifth",
|
||||
"sixth",
|
||||
"seventh",
|
||||
"eighth",
|
||||
"ninth",
|
||||
"tenth",
|
||||
"eleventh",
|
||||
"twelfth",
|
||||
"thirteenth",
|
||||
"fourteenth",
|
||||
"fifteenth",
|
||||
"sixteenth",
|
||||
"seventeenth",
|
||||
"eighteenth",
|
||||
"nineteenth",
|
||||
"twentieth",
|
||||
"thirtieth",
|
||||
"fortieth",
|
||||
"fiftieth",
|
||||
"sixtieth",
|
||||
"seventieth",
|
||||
"eightieth",
|
||||
"ninetieth",
|
||||
"hundredth",
|
||||
"thousandth",
|
||||
"millionth",
|
||||
"billionth",
|
||||
"trillionth",
|
||||
"quadrillionth",
|
||||
"gajillionth",
|
||||
"bazillionth",
|
||||
]
|
||||
|
||||
def like_num(text):
|
||||
if text.startswith(("+", "-", "±", "~")):
|
||||
text = text[1:]
|
||||
|
@ -54,8 +92,18 @@ def like_num(text):
|
|||
num, denom = text.split("/")
|
||||
if num.isdigit() and denom.isdigit():
|
||||
return True
|
||||
if text.lower() in _num_words:
|
||||
|
||||
text_lower = text.lower()
|
||||
if text_lower in _num_words:
|
||||
return True
|
||||
|
||||
# CHeck ordinal number
|
||||
if text_lower in _ordinal_words:
|
||||
return True
|
||||
if text_lower.endswith("th"):
|
||||
if text_lower[:-2].isdigit():
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
|
|
|
@ -61,6 +61,19 @@ def test_lex_attrs_like_number(en_tokenizer, text, match):
|
|||
assert tokens[0].like_num == match
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"word",
|
||||
[
|
||||
"third",
|
||||
"Millionth",
|
||||
"100th",
|
||||
"Hundredth",
|
||||
]
|
||||
)
|
||||
def test_en_lex_attrs_like_number_for_ordinal(word):
|
||||
assert like_num(word)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("word", ["eleven"])
|
||||
def test_en_lex_attrs_capitals(word):
|
||||
assert like_num(word)
|
||||
|
|
Loading…
Reference in New Issue
Block a user