Auto-format code with black

This commit is contained in:
explosion-bot 2021-07-16 08:03:36 +00:00 committed by GitHub
parent f5acc48111
commit eff3d1088b
4 changed files with 11 additions and 6 deletions

View File

@ -12,7 +12,6 @@ PUNCT_RULES = {"«": '"', "»": '"'}
class RussianLemmatizer(Lemmatizer):
def __init__(
self,
vocab: Vocab,

View File

@ -79,7 +79,9 @@ def build_ngram_suggester(sizes: List[int]) -> Callable[[List[Doc]], Ragged]:
@registry.misc("spacy.ngram_range_suggester.v1")
def build_ngram_range_suggester(min_size: int, max_size: int) -> Callable[[List[Doc]], Ragged]:
def build_ngram_range_suggester(
min_size: int, max_size: int
) -> Callable[[List[Doc]], Ragged]:
"""Suggest all spans of the given lengths between a given min and max value - both inclusive.
Spans are returned as a ragged array of integers. The array has two columns,
indicating the start and end position."""

View File

@ -188,7 +188,9 @@ def test_ngram_suggester(en_tokenizer):
def test_ngram_sizes(en_tokenizer):
# test that the range suggester works well
size_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1, 2, 3])
range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(min_size=1, max_size=3)
range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(
min_size=1, max_size=3
)
docs = [
en_tokenizer(text) for text in ["a", "a b", "a b c", "a b c d", "a b c d e"]
]
@ -199,6 +201,8 @@ def test_ngram_sizes(en_tokenizer):
assert_equal(ngrams_1.data, ngrams_2.data)
# one more variation
range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(min_size=2, max_size=4)
range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(
min_size=2, max_size=4
)
ngrams_3 = range_suggester(docs)
assert_equal(ngrams_3.lengths, [0, 1, 3, 6, 9])