Fix/fix en ordinals (#8028)

* Fix #8019

"th" is not the only ordinal ending.

* Add some more ordinal tests
This commit is contained in:
Paul O'Leary McCann 2021-05-07 17:26:42 +09:00 committed by GitHub
parent 71c2a3ab47
commit bdeaf3a18b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 4 additions and 2 deletions

View File

@ -35,7 +35,7 @@ def like_num(text: str) -> bool:
# Check ordinal number
if text_lower in _ordinal_words:
return True
if text_lower.endswith("th"):
if text_lower.endswith(("st", "nd", "rd", "th")):
if text_lower[:-2].isdigit():
return True
return False

View File

@ -56,7 +56,9 @@ def test_lex_attrs_like_number(en_tokenizer, text, match):
assert tokens[0].like_num == match
@pytest.mark.parametrize("word", ["third", "Millionth", "100th", "Hundredth"])
@pytest.mark.parametrize(
"word", ["third", "Millionth", "100th", "Hundredth", "23rd", "52nd"]
)
def test_en_lex_attrs_like_number_for_ordinal(word):
assert like_num(word)