mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
Update lex_attrs.py for Spanish with ordinals (#10038)
* Update lex_attrs.py Add ordinal words * black formatting Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
parent
d2afdfefc2
commit
2abd380f2d
|
@ -47,6 +47,41 @@ _num_words = [
|
|||
]
|
||||
|
||||
|
||||
_ordinal_words = [
|
||||
"primero",
|
||||
"segundo",
|
||||
"tercero",
|
||||
"cuarto",
|
||||
"quinto",
|
||||
"sexto",
|
||||
"séptimo",
|
||||
"octavo",
|
||||
"noveno",
|
||||
"décimo",
|
||||
"undécimo",
|
||||
"duodécimo",
|
||||
"decimotercero",
|
||||
"decimocuarto",
|
||||
"decimoquinto",
|
||||
"decimosexto",
|
||||
"decimoséptimo",
|
||||
"decimoctavo",
|
||||
"decimonoveno",
|
||||
"vigésimo",
|
||||
"trigésimo",
|
||||
"cuadragésimo",
|
||||
"quincuagésimo",
|
||||
"sexagésimo",
|
||||
"septuagésimo",
|
||||
"octogésima",
|
||||
"nonagésima",
|
||||
"centésima",
|
||||
"milésima",
|
||||
"millonésima",
|
||||
"billonésima",
|
||||
]
|
||||
|
||||
|
||||
def like_num(text):
|
||||
if text.startswith(("+", "-", "±", "~")):
|
||||
text = text[1:]
|
||||
|
@ -57,7 +92,11 @@ def like_num(text):
|
|||
num, denom = text.split("/")
|
||||
if num.isdigit() and denom.isdigit():
|
||||
return True
|
||||
if text.lower() in _num_words:
|
||||
text_lower = text.lower()
|
||||
if text_lower in _num_words:
|
||||
return True
|
||||
# Check ordinal number
|
||||
if text_lower in _ordinal_words:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user