diff --git a/spacy/lang/ru/lemmatizer.py b/spacy/lang/ru/lemmatizer.py index 5a49a4e00..399cd174c 100644 --- a/spacy/lang/ru/lemmatizer.py +++ b/spacy/lang/ru/lemmatizer.py @@ -12,7 +12,6 @@ PUNCT_RULES = {"«": '"', "»": '"'} class RussianLemmatizer(Lemmatizer): - def __init__( self, vocab: Vocab, diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py index 162532b05..8d1be06c3 100644 --- a/spacy/pipeline/spancat.py +++ b/spacy/pipeline/spancat.py @@ -79,11 +79,13 @@ def build_ngram_suggester(sizes: List[int]) -> Callable[[List[Doc]], Ragged]: @registry.misc("spacy.ngram_range_suggester.v1") -def build_ngram_range_suggester(min_size: int, max_size: int) -> Callable[[List[Doc]], Ragged]: +def build_ngram_range_suggester( + min_size: int, max_size: int +) -> Callable[[List[Doc]], Ragged]: """Suggest all spans of the given lengths between a given min and max value - both inclusive. Spans are returned as a ragged array of integers. The array has two columns, indicating the start and end position.""" - sizes = range(min_size, max_size+1) + sizes = range(min_size, max_size + 1) return build_ngram_suggester(sizes) diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py index 4ba82c69a..a5dedcc87 100644 --- a/spacy/tests/conftest.py +++ b/spacy/tests/conftest.py @@ -123,7 +123,7 @@ def fr_tokenizer(): @pytest.fixture(scope="session") def ga_tokenizer(): return get_lang_class("ga")().tokenizer - + @pytest.fixture(scope="session") def grc_tokenizer(): diff --git a/spacy/tests/pipeline/test_spancat.py b/spacy/tests/pipeline/test_spancat.py index 98f499065..93fbc5969 100644 --- a/spacy/tests/pipeline/test_spancat.py +++ b/spacy/tests/pipeline/test_spancat.py @@ -188,7 +188,9 @@ def test_ngram_suggester(en_tokenizer): def test_ngram_sizes(en_tokenizer): # test that the range suggester works well size_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1, 2, 3]) - range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(min_size=1, max_size=3) + range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")( + min_size=1, max_size=3 + ) docs = [ en_tokenizer(text) for text in ["a", "a b", "a b c", "a b c d", "a b c d e"] ] @@ -199,6 +201,8 @@ def test_ngram_sizes(en_tokenizer): assert_equal(ngrams_1.data, ngrams_2.data) # one more variation - range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(min_size=2, max_size=4) + range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")( + min_size=2, max_size=4 + ) ngrams_3 = range_suggester(docs) assert_equal(ngrams_3.lengths, [0, 1, 3, 6, 9])