mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Fix/fix en ordinals (#8028)
* Fix #8019 "th" is not the only ordinal ending. * Add some more ordinal tests
This commit is contained in:
parent
71c2a3ab47
commit
bdeaf3a18b
|
@ -35,7 +35,7 @@ def like_num(text: str) -> bool:
|
||||||
# Check ordinal number
|
# Check ordinal number
|
||||||
if text_lower in _ordinal_words:
|
if text_lower in _ordinal_words:
|
||||||
return True
|
return True
|
||||||
if text_lower.endswith("th"):
|
if text_lower.endswith(("st", "nd", "rd", "th")):
|
||||||
if text_lower[:-2].isdigit():
|
if text_lower[:-2].isdigit():
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -56,7 +56,9 @@ def test_lex_attrs_like_number(en_tokenizer, text, match):
|
||||||
assert tokens[0].like_num == match
|
assert tokens[0].like_num == match
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("word", ["third", "Millionth", "100th", "Hundredth"])
|
@pytest.mark.parametrize(
|
||||||
|
"word", ["third", "Millionth", "100th", "Hundredth", "23rd", "52nd"]
|
||||||
|
)
|
||||||
def test_en_lex_attrs_like_number_for_ordinal(word):
|
def test_en_lex_attrs_like_number_for_ordinal(word):
|
||||||
assert like_num(word)
|
assert like_num(word)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user