From 148b036e0cae9eebb6968cea5ecede1ebc7205a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20Rodr=C3=ADguez=20Medina?=
 <samuelrod@protonmail.com>
Date: Thu, 30 Apr 2020 11:13:23 +0200
Subject: [PATCH] Spanish like num improvement (#5381)

* Add tests for Spanish like_num.

* Add missing numbers in Spanish lexical attributes for like_num.

* Modify Spanish test function name.

* Add contributor agreement.
---
 spacy/lang/es/lex_attrs.py       |  9 +++++++++
 spacy/tests/lang/es/test_text.py | 30 ++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/spacy/lang/es/lex_attrs.py b/spacy/lang/es/lex_attrs.py
index 03ada1f43..632a638fc 100644
--- a/spacy/lang/es/lex_attrs.py
+++ b/spacy/lang/es/lex_attrs.py
@@ -26,6 +26,15 @@ _num_words = [
     "dieciocho",
     "diecinueve",
     "veinte",
+    "veintiuno",
+    "veintidós",
+    "veintitrés",
+    "veinticuatro",
+    "veinticinco",
+    "veintiséis",
+    "veintisiete",
+    "veintiocho",
+    "veintinueve",
     "treinta",
     "cuarenta",
     "cincuenta",
diff --git a/spacy/tests/lang/es/test_text.py b/spacy/tests/lang/es/test_text.py
index acd572b48..e237f922d 100644
--- a/spacy/tests/lang/es/test_text.py
+++ b/spacy/tests/lang/es/test_text.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import pytest
+from spacy.lang.es.lex_attrs import like_num
 
 
 def test_es_tokenizer_handles_long_text(es_tokenizer):
@@ -33,3 +34,32 @@ en Montevideo y que pregona las bondades de la vida austera."""
 def test_es_tokenizer_handles_cnts(es_tokenizer, text, length):
     tokens = es_tokenizer(text)
     assert len(tokens) == length
+
+
+@pytest.mark.parametrize(
+    "text,match",
+    [
+        ("10", True),
+        ("1", True),
+        ("10.000", True),
+        ("1000", True),
+        ("999,0", True),
+        ("uno", True),
+        ("dos", True),
+        ("billón", True),
+        ("veintiséis", True),
+        ("perro", False),
+        (",", False),
+        ("1/2", True),
+    ],
+)
+def test_lex_attrs_like_number(es_tokenizer, text, match):
+    tokens = es_tokenizer(text)
+    assert len(tokens) == 1
+    assert tokens[0].like_num == match
+
+
+@pytest.mark.parametrize("word", ["once"])
+def test_es_lex_attrs_capitals(word):
+    assert like_num(word)
+    assert like_num(word.upper())
\ No newline at end of file