mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
fe5f5d6ac6
* Update Makefile For more recent python version * updated for bsc changes New tokenization changes * Update test_text.py * updating tests and requirements * changed failed test in test/lang/ca changed failed test in test/lang/ca * Update .gitignore deleted stashed changes line * back to python 3.6 and remove transformer requirements As per request * Update test_exception.py Change the test * Update test_exception.py Remove test print * Update Makefile For more recent python version * updated for bsc changes New tokenization changes * updating tests and requirements * Update requirements.txt Removed spacy-transfromers from requirements * Update test_exception.py Added final punctuation to ensure consistency * Update Makefile Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Format * Update test to check all tokens Co-authored-by: cayorodriguez <crodriguezp@gmail.com> Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
29 lines
621 B
Python
29 lines
621 B
Python
import pytest
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"text,lemma",
|
|
[("aprox.", "aproximadament"), ("pàg.", "pàgina"), ("p.ex.", "per exemple")],
|
|
)
|
|
def test_ca_tokenizer_handles_abbr(ca_tokenizer, text, lemma):
|
|
tokens = ca_tokenizer(text)
|
|
assert len(tokens) == 1
|
|
|
|
|
|
def test_ca_tokenizer_handles_exc_in_text(ca_tokenizer):
|
|
text = "La Dra. Puig viu a la pl. dels Til·lers."
|
|
doc = ca_tokenizer(text)
|
|
assert [t.text for t in doc] == [
|
|
"La",
|
|
"Dra.",
|
|
"Puig",
|
|
"viu",
|
|
"a",
|
|
"la",
|
|
"pl.",
|
|
"d",
|
|
"els",
|
|
"Til·lers",
|
|
".",
|
|
]
|