mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Spanish like num improvement (#5381)
* Add tests for Spanish like_num. * Add missing numbers in Spanish lexical attributes for like_num. * Modify Spanish test function name. * Add contributor agreement.
This commit is contained in:
parent
8602daba85
commit
148b036e0c
|
@ -26,6 +26,15 @@ _num_words = [
|
||||||
"dieciocho",
|
"dieciocho",
|
||||||
"diecinueve",
|
"diecinueve",
|
||||||
"veinte",
|
"veinte",
|
||||||
|
"veintiuno",
|
||||||
|
"veintidós",
|
||||||
|
"veintitrés",
|
||||||
|
"veinticuatro",
|
||||||
|
"veinticinco",
|
||||||
|
"veintiséis",
|
||||||
|
"veintisiete",
|
||||||
|
"veintiocho",
|
||||||
|
"veintinueve",
|
||||||
"treinta",
|
"treinta",
|
||||||
"cuarenta",
|
"cuarenta",
|
||||||
"cincuenta",
|
"cincuenta",
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from spacy.lang.es.lex_attrs import like_num
|
||||||
|
|
||||||
|
|
||||||
def test_es_tokenizer_handles_long_text(es_tokenizer):
|
def test_es_tokenizer_handles_long_text(es_tokenizer):
|
||||||
|
@ -33,3 +34,32 @@ en Montevideo y que pregona las bondades de la vida austera."""
|
||||||
def test_es_tokenizer_handles_cnts(es_tokenizer, text, length):
|
def test_es_tokenizer_handles_cnts(es_tokenizer, text, length):
|
||||||
tokens = es_tokenizer(text)
|
tokens = es_tokenizer(text)
|
||||||
assert len(tokens) == length
|
assert len(tokens) == length
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"text,match",
|
||||||
|
[
|
||||||
|
("10", True),
|
||||||
|
("1", True),
|
||||||
|
("10.000", True),
|
||||||
|
("1000", True),
|
||||||
|
("999,0", True),
|
||||||
|
("uno", True),
|
||||||
|
("dos", True),
|
||||||
|
("billón", True),
|
||||||
|
("veintiséis", True),
|
||||||
|
("perro", False),
|
||||||
|
(",", False),
|
||||||
|
("1/2", True),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_lex_attrs_like_number(es_tokenizer, text, match):
|
||||||
|
tokens = es_tokenizer(text)
|
||||||
|
assert len(tokens) == 1
|
||||||
|
assert tokens[0].like_num == match
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("word", ["once"])
|
||||||
|
def test_es_lex_attrs_capitals(word):
|
||||||
|
assert like_num(word)
|
||||||
|
assert like_num(word.upper())
|
Loading…
Reference in New Issue
Block a user