Auto-format code with black (#11427)

Co-authored-by: explosion-bot <explosion-bot@users.noreply.github.com>
This commit is contained in:
github-actions[bot] 2022-09-02 11:43:20 +02:00 committed by GitHub
parent d1760ebe02
commit 71884d0942
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 67 additions and 16 deletions

View File

@ -6,7 +6,7 @@ from .lex_attrs import LEX_ATTRS
class LatinDefaults(BaseDefaults): class LatinDefaults(BaseDefaults):
tokenizer_exceptions = TOKENIZER_EXCEPTIONS tokenizer_exceptions = TOKENIZER_EXCEPTIONS
stop_words = STOP_WORDS stop_words = STOP_WORDS
lex_attr_getters = LEX_ATTRS lex_attr_getters = LEX_ATTRS

View File

@ -2,7 +2,9 @@ from ...attrs import LIKE_NUM
import re import re
# cf. Goyvaerts/Levithan 2009; case-insensitive, allow 4 # cf. Goyvaerts/Levithan 2009; case-insensitive, allow 4
roman_numerals_compile = re.compile(r'(?i)^(?=[MDCLXVI])M*(C[MD]|D?C{0,4})(X[CL]|L?X{0,4})(I[XV]|V?I{0,4})$') roman_numerals_compile = re.compile(
r"(?i)^(?=[MDCLXVI])M*(C[MD]|D?C{0,4})(X[CL]|L?X{0,4})(I[XV]|V?I{0,4})$"
)
_num_words = set( _num_words = set(
""" """

View File

@ -9,21 +9,67 @@ _exc = {
"tecum": [{ORTH: "te"}, {ORTH: "cum"}], "tecum": [{ORTH: "te"}, {ORTH: "cum"}],
"nobiscum": [{ORTH: "nobis"}, {ORTH: "cum"}], "nobiscum": [{ORTH: "nobis"}, {ORTH: "cum"}],
"vobiscum": [{ORTH: "vobis"}, {ORTH: "cum"}], "vobiscum": [{ORTH: "vobis"}, {ORTH: "cum"}],
"uobiscum": [{ORTH: "uobis"}, {ORTH: "cum"}], "uobiscum": [{ORTH: "uobis"}, {ORTH: "cum"}],
} }
for orth in [ for orth in [
"A.",
'A.', 'Agr.', 'Ap.', 'C.', 'Cn.', 'D.', 'F.', 'K.', 'L.', "M'.", 'M.', 'Mam.', 'N.', 'Oct.', "Agr.",
'Opet.', 'P.', 'Paul.', 'Post.', 'Pro.', 'Q.', 'S.', 'Ser.', 'Sert.', 'Sex.', 'St.', 'Sta.', "Ap.",
'T.', 'Ti.', 'V.', 'Vol.', 'Vop.', 'U.', 'Uol.', 'Uop.', "C.",
"Cn.",
'Ian.', 'Febr.', 'Mart.', 'Apr.', 'Mai.', 'Iun.', 'Iul.', 'Aug.', 'Sept.', 'Oct.', 'Nov.', 'Nou.', "D.",
'Dec.', "F.",
"K.",
'Non.', 'Id.', 'A.D.', "L.",
"M'.",
'Coll.', 'Cos.', 'Ord.', 'Pl.', 'S.C.', 'Suff.', 'Trib.', "M.",
"Mam.",
"N.",
"Oct.",
"Opet.",
"P.",
"Paul.",
"Post.",
"Pro.",
"Q.",
"S.",
"Ser.",
"Sert.",
"Sex.",
"St.",
"Sta.",
"T.",
"Ti.",
"V.",
"Vol.",
"Vop.",
"U.",
"Uol.",
"Uop.",
"Ian.",
"Febr.",
"Mart.",
"Apr.",
"Mai.",
"Iun.",
"Iul.",
"Aug.",
"Sept.",
"Oct.",
"Nov.",
"Nou.",
"Dec.",
"Non.",
"Id.",
"A.D.",
"Coll.",
"Cos.",
"Ord.",
"Pl.",
"S.C.",
"Suff.",
"Trib.",
]: ]:
_exc[orth] = [{ORTH: orth}] _exc[orth] = [{ORTH: orth}]

View File

@ -258,7 +258,7 @@ def ko_tokenizer_tokenizer():
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def la_tokenizer(): def la_tokenizer():
return get_lang_class("la")().tokenizer return get_lang_class("la")().tokenizer
@pytest.fixture(scope="session") @pytest.fixture(scope="session")

View File

@ -1,5 +1,6 @@
import pytest import pytest
def test_la_tokenizer_handles_exc_in_text(la_tokenizer): def test_la_tokenizer_handles_exc_in_text(la_tokenizer):
text = "scio te omnia facturum, ut nobiscum quam primum sis" text = "scio te omnia facturum, ut nobiscum quam primum sis"
tokens = la_tokenizer(text) tokens = la_tokenizer(text)

View File

@ -1,6 +1,7 @@
import pytest import pytest
from spacy.lang.la.lex_attrs import like_num from spacy.lang.la.lex_attrs import like_num
@pytest.mark.parametrize( @pytest.mark.parametrize(
"text,match", "text,match",
[ [
@ -13,7 +14,7 @@ from spacy.lang.la.lex_attrs import like_num
("ix", True), ("ix", True),
("MMXXII", True), ("MMXXII", True),
("0", True), ("0", True),
("1", True), ("1", True),
("quattuor", True), ("quattuor", True),
("decem", True), ("decem", True),
("tertius", True), ("tertius", True),
@ -27,6 +28,7 @@ def test_lex_attrs_like_number(la_tokenizer, text, match):
assert len(tokens) == 1 assert len(tokens) == 1
assert tokens[0].like_num == match assert tokens[0].like_num == match
@pytest.mark.parametrize("word", ["quinque"]) @pytest.mark.parametrize("word", ["quinque"])
def test_la_lex_attrs_capitals(word): def test_la_lex_attrs_capitals(word):
assert like_num(word) assert like_num(word)