mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 00:46:28 +03:00
Fix/fix en ordinals (#8028)
* Fix #8019 "th" is not the only ordinal ending. * Add some more ordinal tests
This commit is contained in:
parent
71c2a3ab47
commit
bdeaf3a18b
|
@ -35,7 +35,7 @@ def like_num(text: str) -> bool:
|
|||
# Check ordinal number
|
||||
if text_lower in _ordinal_words:
|
||||
return True
|
||||
if text_lower.endswith("th"):
|
||||
if text_lower.endswith(("st", "nd", "rd", "th")):
|
||||
if text_lower[:-2].isdigit():
|
||||
return True
|
||||
return False
|
||||
|
|
|
@ -56,7 +56,9 @@ def test_lex_attrs_like_number(en_tokenizer, text, match):
|
|||
assert tokens[0].like_num == match
|
||||
|
||||
|
||||
@pytest.mark.parametrize("word", ["third", "Millionth", "100th", "Hundredth"])
|
||||
@pytest.mark.parametrize(
|
||||
"word", ["third", "Millionth", "100th", "Hundredth", "23rd", "52nd"]
|
||||
)
|
||||
def test_en_lex_attrs_like_number_for_ordinal(word):
|
||||
assert like_num(word)
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user