mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Update lex_attrs.py for Spanish with ordinals (#10038)
* Update lex_attrs.py Add ordinal words * black formatting Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
parent
d2afdfefc2
commit
2abd380f2d
|
@ -47,6 +47,41 @@ _num_words = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
_ordinal_words = [
|
||||||
|
"primero",
|
||||||
|
"segundo",
|
||||||
|
"tercero",
|
||||||
|
"cuarto",
|
||||||
|
"quinto",
|
||||||
|
"sexto",
|
||||||
|
"séptimo",
|
||||||
|
"octavo",
|
||||||
|
"noveno",
|
||||||
|
"décimo",
|
||||||
|
"undécimo",
|
||||||
|
"duodécimo",
|
||||||
|
"decimotercero",
|
||||||
|
"decimocuarto",
|
||||||
|
"decimoquinto",
|
||||||
|
"decimosexto",
|
||||||
|
"decimoséptimo",
|
||||||
|
"decimoctavo",
|
||||||
|
"decimonoveno",
|
||||||
|
"vigésimo",
|
||||||
|
"trigésimo",
|
||||||
|
"cuadragésimo",
|
||||||
|
"quincuagésimo",
|
||||||
|
"sexagésimo",
|
||||||
|
"septuagésimo",
|
||||||
|
"octogésima",
|
||||||
|
"nonagésima",
|
||||||
|
"centésima",
|
||||||
|
"milésima",
|
||||||
|
"millonésima",
|
||||||
|
"billonésima",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def like_num(text):
|
def like_num(text):
|
||||||
if text.startswith(("+", "-", "±", "~")):
|
if text.startswith(("+", "-", "±", "~")):
|
||||||
text = text[1:]
|
text = text[1:]
|
||||||
|
@ -57,7 +92,11 @@ def like_num(text):
|
||||||
num, denom = text.split("/")
|
num, denom = text.split("/")
|
||||||
if num.isdigit() and denom.isdigit():
|
if num.isdigit() and denom.isdigit():
|
||||||
return True
|
return True
|
||||||
if text.lower() in _num_words:
|
text_lower = text.lower()
|
||||||
|
if text_lower in _num_words:
|
||||||
|
return True
|
||||||
|
# Check ordinal number
|
||||||
|
if text_lower in _ordinal_words:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user