mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Auto-format code with black
This commit is contained in:
parent
f5acc48111
commit
eff3d1088b
|
@ -12,7 +12,6 @@ PUNCT_RULES = {"«": '"', "»": '"'}
|
||||||
|
|
||||||
|
|
||||||
class RussianLemmatizer(Lemmatizer):
|
class RussianLemmatizer(Lemmatizer):
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
vocab: Vocab,
|
vocab: Vocab,
|
||||||
|
|
|
@ -79,11 +79,13 @@ def build_ngram_suggester(sizes: List[int]) -> Callable[[List[Doc]], Ragged]:
|
||||||
|
|
||||||
|
|
||||||
@registry.misc("spacy.ngram_range_suggester.v1")
|
@registry.misc("spacy.ngram_range_suggester.v1")
|
||||||
def build_ngram_range_suggester(min_size: int, max_size: int) -> Callable[[List[Doc]], Ragged]:
|
def build_ngram_range_suggester(
|
||||||
|
min_size: int, max_size: int
|
||||||
|
) -> Callable[[List[Doc]], Ragged]:
|
||||||
"""Suggest all spans of the given lengths between a given min and max value - both inclusive.
|
"""Suggest all spans of the given lengths between a given min and max value - both inclusive.
|
||||||
Spans are returned as a ragged array of integers. The array has two columns,
|
Spans are returned as a ragged array of integers. The array has two columns,
|
||||||
indicating the start and end position."""
|
indicating the start and end position."""
|
||||||
sizes = range(min_size, max_size+1)
|
sizes = range(min_size, max_size + 1)
|
||||||
return build_ngram_suggester(sizes)
|
return build_ngram_suggester(sizes)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -123,7 +123,7 @@ def fr_tokenizer():
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def ga_tokenizer():
|
def ga_tokenizer():
|
||||||
return get_lang_class("ga")().tokenizer
|
return get_lang_class("ga")().tokenizer
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def grc_tokenizer():
|
def grc_tokenizer():
|
||||||
|
|
|
@ -188,7 +188,9 @@ def test_ngram_suggester(en_tokenizer):
|
||||||
def test_ngram_sizes(en_tokenizer):
|
def test_ngram_sizes(en_tokenizer):
|
||||||
# test that the range suggester works well
|
# test that the range suggester works well
|
||||||
size_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1, 2, 3])
|
size_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1, 2, 3])
|
||||||
range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(min_size=1, max_size=3)
|
range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(
|
||||||
|
min_size=1, max_size=3
|
||||||
|
)
|
||||||
docs = [
|
docs = [
|
||||||
en_tokenizer(text) for text in ["a", "a b", "a b c", "a b c d", "a b c d e"]
|
en_tokenizer(text) for text in ["a", "a b", "a b c", "a b c d", "a b c d e"]
|
||||||
]
|
]
|
||||||
|
@ -199,6 +201,8 @@ def test_ngram_sizes(en_tokenizer):
|
||||||
assert_equal(ngrams_1.data, ngrams_2.data)
|
assert_equal(ngrams_1.data, ngrams_2.data)
|
||||||
|
|
||||||
# one more variation
|
# one more variation
|
||||||
range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(min_size=2, max_size=4)
|
range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(
|
||||||
|
min_size=2, max_size=4
|
||||||
|
)
|
||||||
ngrams_3 = range_suggester(docs)
|
ngrams_3 = range_suggester(docs)
|
||||||
assert_equal(ngrams_3.lengths, [0, 1, 3, 6, 9])
|
assert_equal(ngrams_3.lengths, [0, 1, 3, 6, 9])
|
||||||
|
|
Loading…
Reference in New Issue
Block a user