From 2abd380f2d17010fe22fe52e8aab529d70cbeec6 Mon Sep 17 00:00:00 2001 From: pepemedigu Date: Thu, 20 Jan 2022 15:44:13 +0100 Subject: [PATCH] Update lex_attrs.py for Spanish with ordinals (#10038) * Update lex_attrs.py Add ordinal words * black formatting Co-authored-by: Sofie Van Landeghem --- spacy/lang/es/lex_attrs.py | 41 +++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/spacy/lang/es/lex_attrs.py b/spacy/lang/es/lex_attrs.py index 988dbaba1..9d1fa93b8 100644 --- a/spacy/lang/es/lex_attrs.py +++ b/spacy/lang/es/lex_attrs.py @@ -47,6 +47,41 @@ _num_words = [ ] +_ordinal_words = [ + "primero", + "segundo", + "tercero", + "cuarto", + "quinto", + "sexto", + "séptimo", + "octavo", + "noveno", + "décimo", + "undécimo", + "duodécimo", + "decimotercero", + "decimocuarto", + "decimoquinto", + "decimosexto", + "decimoséptimo", + "decimoctavo", + "decimonoveno", + "vigésimo", + "trigésimo", + "cuadragésimo", + "quincuagésimo", + "sexagésimo", + "septuagésimo", + "octogésima", + "nonagésima", + "centésima", + "milésima", + "millonésima", + "billonésima", +] + + def like_num(text): if text.startswith(("+", "-", "±", "~")): text = text[1:] @@ -57,7 +92,11 @@ def like_num(text): num, denom = text.split("/") if num.isdigit() and denom.isdigit(): return True - if text.lower() in _num_words: + text_lower = text.lower() + if text_lower in _num_words: + return True + # Check ordinal number + if text_lower in _ordinal_words: return True return False