Auto-format code with black

This commit is contained in:
explosion-bot 2021-07-16 08:03:36 +00:00 committed by GitHub
parent f5acc48111
commit eff3d1088b
4 changed files with 11 additions and 6 deletions

View File

@ -12,7 +12,6 @@ PUNCT_RULES = {"«": '"', "»": '"'}
class RussianLemmatizer(Lemmatizer): class RussianLemmatizer(Lemmatizer):
def __init__( def __init__(
self, self,
vocab: Vocab, vocab: Vocab,

View File

@ -79,11 +79,13 @@ def build_ngram_suggester(sizes: List[int]) -> Callable[[List[Doc]], Ragged]:
@registry.misc("spacy.ngram_range_suggester.v1") @registry.misc("spacy.ngram_range_suggester.v1")
def build_ngram_range_suggester(min_size: int, max_size: int) -> Callable[[List[Doc]], Ragged]: def build_ngram_range_suggester(
min_size: int, max_size: int
) -> Callable[[List[Doc]], Ragged]:
"""Suggest all spans of the given lengths between a given min and max value - both inclusive. """Suggest all spans of the given lengths between a given min and max value - both inclusive.
Spans are returned as a ragged array of integers. The array has two columns, Spans are returned as a ragged array of integers. The array has two columns,
indicating the start and end position.""" indicating the start and end position."""
sizes = range(min_size, max_size+1) sizes = range(min_size, max_size + 1)
return build_ngram_suggester(sizes) return build_ngram_suggester(sizes)

View File

@ -188,7 +188,9 @@ def test_ngram_suggester(en_tokenizer):
def test_ngram_sizes(en_tokenizer): def test_ngram_sizes(en_tokenizer):
# test that the range suggester works well # test that the range suggester works well
size_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1, 2, 3]) size_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1, 2, 3])
range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(min_size=1, max_size=3) range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(
min_size=1, max_size=3
)
docs = [ docs = [
en_tokenizer(text) for text in ["a", "a b", "a b c", "a b c d", "a b c d e"] en_tokenizer(text) for text in ["a", "a b", "a b c", "a b c d", "a b c d e"]
] ]
@ -199,6 +201,8 @@ def test_ngram_sizes(en_tokenizer):
assert_equal(ngrams_1.data, ngrams_2.data) assert_equal(ngrams_1.data, ngrams_2.data)
# one more variation # one more variation
range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(min_size=2, max_size=4) range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(
min_size=2, max_size=4
)
ngrams_3 = range_suggester(docs) ngrams_3 = range_suggester(docs)
assert_equal(ngrams_3.lengths, [0, 1, 3, 6, 9]) assert_equal(ngrams_3.lengths, [0, 1, 3, 6, 9])