From 71884d0942c9b45f0ce5408496aec1aff2f0a4b7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 2 Sep 2022 11:43:20 +0200 Subject: [PATCH] Auto-format code with black (#11427) Co-authored-by: explosion-bot --- spacy/lang/la/__init__.py | 2 +- spacy/lang/la/lex_attrs.py | 4 +- spacy/lang/la/tokenizer_exceptions.py | 70 ++++++++++++++++++++++----- spacy/tests/conftest.py | 2 +- spacy/tests/lang/la/test_exception.py | 1 + spacy/tests/lang/la/test_text.py | 4 +- 6 files changed, 67 insertions(+), 16 deletions(-) diff --git a/spacy/lang/la/__init__.py b/spacy/lang/la/__init__.py index 5f2cccee3..15b87c5b9 100644 --- a/spacy/lang/la/__init__.py +++ b/spacy/lang/la/__init__.py @@ -6,7 +6,7 @@ from .lex_attrs import LEX_ATTRS class LatinDefaults(BaseDefaults): tokenizer_exceptions = TOKENIZER_EXCEPTIONS - stop_words = STOP_WORDS + stop_words = STOP_WORDS lex_attr_getters = LEX_ATTRS diff --git a/spacy/lang/la/lex_attrs.py b/spacy/lang/la/lex_attrs.py index 9348a811a..9efb4dd3c 100644 --- a/spacy/lang/la/lex_attrs.py +++ b/spacy/lang/la/lex_attrs.py @@ -2,7 +2,9 @@ from ...attrs import LIKE_NUM import re # cf. Goyvaerts/Levithan 2009; case-insensitive, allow 4 -roman_numerals_compile = re.compile(r'(?i)^(?=[MDCLXVI])M*(C[MD]|D?C{0,4})(X[CL]|L?X{0,4})(I[XV]|V?I{0,4})$') +roman_numerals_compile = re.compile( + r"(?i)^(?=[MDCLXVI])M*(C[MD]|D?C{0,4})(X[CL]|L?X{0,4})(I[XV]|V?I{0,4})$" +) _num_words = set( """ diff --git a/spacy/lang/la/tokenizer_exceptions.py b/spacy/lang/la/tokenizer_exceptions.py index 905304188..060f6e085 100644 --- a/spacy/lang/la/tokenizer_exceptions.py +++ b/spacy/lang/la/tokenizer_exceptions.py @@ -9,21 +9,67 @@ _exc = { "tecum": [{ORTH: "te"}, {ORTH: "cum"}], "nobiscum": [{ORTH: "nobis"}, {ORTH: "cum"}], "vobiscum": [{ORTH: "vobis"}, {ORTH: "cum"}], - "uobiscum": [{ORTH: "uobis"}, {ORTH: "cum"}], + "uobiscum": [{ORTH: "uobis"}, {ORTH: "cum"}], } for orth in [ - - 'A.', 'Agr.', 'Ap.', 'C.', 'Cn.', 'D.', 'F.', 'K.', 'L.', "M'.", 'M.', 'Mam.', 'N.', 'Oct.', - 'Opet.', 'P.', 'Paul.', 'Post.', 'Pro.', 'Q.', 'S.', 'Ser.', 'Sert.', 'Sex.', 'St.', 'Sta.', - 'T.', 'Ti.', 'V.', 'Vol.', 'Vop.', 'U.', 'Uol.', 'Uop.', - - 'Ian.', 'Febr.', 'Mart.', 'Apr.', 'Mai.', 'Iun.', 'Iul.', 'Aug.', 'Sept.', 'Oct.', 'Nov.', 'Nou.', - 'Dec.', - - 'Non.', 'Id.', 'A.D.', - - 'Coll.', 'Cos.', 'Ord.', 'Pl.', 'S.C.', 'Suff.', 'Trib.', + "A.", + "Agr.", + "Ap.", + "C.", + "Cn.", + "D.", + "F.", + "K.", + "L.", + "M'.", + "M.", + "Mam.", + "N.", + "Oct.", + "Opet.", + "P.", + "Paul.", + "Post.", + "Pro.", + "Q.", + "S.", + "Ser.", + "Sert.", + "Sex.", + "St.", + "Sta.", + "T.", + "Ti.", + "V.", + "Vol.", + "Vop.", + "U.", + "Uol.", + "Uop.", + "Ian.", + "Febr.", + "Mart.", + "Apr.", + "Mai.", + "Iun.", + "Iul.", + "Aug.", + "Sept.", + "Oct.", + "Nov.", + "Nou.", + "Dec.", + "Non.", + "Id.", + "A.D.", + "Coll.", + "Cos.", + "Ord.", + "Pl.", + "S.C.", + "Suff.", + "Trib.", ]: _exc[orth] = [{ORTH: orth}] diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py index 0395ba7ca..742bfcc6a 100644 --- a/spacy/tests/conftest.py +++ b/spacy/tests/conftest.py @@ -258,7 +258,7 @@ def ko_tokenizer_tokenizer(): @pytest.fixture(scope="module") def la_tokenizer(): - return get_lang_class("la")().tokenizer + return get_lang_class("la")().tokenizer @pytest.fixture(scope="session") diff --git a/spacy/tests/lang/la/test_exception.py b/spacy/tests/lang/la/test_exception.py index 04bc1d489..966ae22cf 100644 --- a/spacy/tests/lang/la/test_exception.py +++ b/spacy/tests/lang/la/test_exception.py @@ -1,5 +1,6 @@ import pytest + def test_la_tokenizer_handles_exc_in_text(la_tokenizer): text = "scio te omnia facturum, ut nobiscum quam primum sis" tokens = la_tokenizer(text) diff --git a/spacy/tests/lang/la/test_text.py b/spacy/tests/lang/la/test_text.py index 11676b92b..48e7359a4 100644 --- a/spacy/tests/lang/la/test_text.py +++ b/spacy/tests/lang/la/test_text.py @@ -1,6 +1,7 @@ import pytest from spacy.lang.la.lex_attrs import like_num + @pytest.mark.parametrize( "text,match", [ @@ -13,7 +14,7 @@ from spacy.lang.la.lex_attrs import like_num ("ix", True), ("MMXXII", True), ("0", True), - ("1", True), + ("1", True), ("quattuor", True), ("decem", True), ("tertius", True), @@ -27,6 +28,7 @@ def test_lex_attrs_like_number(la_tokenizer, text, match): assert len(tokens) == 1 assert tokens[0].like_num == match + @pytest.mark.parametrize("word", ["quinque"]) def test_la_lex_attrs_capitals(word): assert like_num(word)