mirror of
https://github.com/explosion/spaCy.git
synced 2025-10-24 04:31:17 +03:00
* Add ancient Greek language support Initial commit * Contributor Agreement * grc tokenizer test added and files formatted with black, unnecessary import removed Co-Authored-By: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Commas in lists fixed. __init__py added to test * Update lex_attrs.py * Update stop_words.py * Update stop_words.py Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
24 lines
543 B
Python
24 lines
543 B
Python
import pytest
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"text,match",
|
||
[
|
||
("ι", True),
|
||
("α", True),
|
||
("ϟα", True),
|
||
("ἑκατόν", True),
|
||
("ἐνακόσια", True),
|
||
("δισχίλια", True),
|
||
("μύρια", True),
|
||
("εἷς", True),
|
||
("λόγος", False),
|
||
(",", False),
|
||
("λβ", True),
|
||
],
|
||
)
|
||
def test_lex_attrs_like_number(grc_tokenizer, text, match):
|
||
tokens = grc_tokenizer(text)
|
||
assert len(tokens) == 1
|
||
assert tokens[0].like_num == match
|