mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Auto-format [ci skip]
This commit is contained in:
parent
1a554bdcb1
commit
126268ce50
|
@ -26,7 +26,9 @@ class Ukrainian(Language):
|
|||
default_config={"model": None, "mode": "pymorphy2"},
|
||||
default_score_weights={"lemma_acc": 1.0},
|
||||
)
|
||||
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool = False,):
|
||||
def make_lemmatizer(
|
||||
nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool = False
|
||||
):
|
||||
return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)
|
||||
|
||||
|
||||
|
|
|
@ -54,9 +54,7 @@ def create_chinese_tokenizer(segmenter: Segmenter = Segmenter.char):
|
|||
|
||||
|
||||
class ChineseTokenizer(DummyTokenizer):
|
||||
def __init__(
|
||||
self, nlp: Language, segmenter: Segmenter = Segmenter.char,
|
||||
):
|
||||
def __init__(self, nlp: Language, segmenter: Segmenter = Segmenter.char):
|
||||
self.vocab = nlp.vocab
|
||||
if isinstance(segmenter, Segmenter):
|
||||
segmenter = segmenter.value
|
||||
|
@ -87,7 +85,7 @@ class ChineseTokenizer(DummyTokenizer):
|
|||
if pkuseg_user_dict is None:
|
||||
pkuseg_user_dict = pkuseg_model
|
||||
self.pkuseg_seg = try_pkuseg_import(
|
||||
pkuseg_model=pkuseg_model, pkuseg_user_dict=pkuseg_user_dict,
|
||||
pkuseg_model=pkuseg_model, pkuseg_user_dict=pkuseg_user_dict
|
||||
)
|
||||
|
||||
def __call__(self, text: str) -> Doc:
|
||||
|
|
|
@ -209,7 +209,11 @@ def test_doc_retokenizer_split_norm(en_vocab):
|
|||
# Retokenize to split out the words in the token at doc[2].
|
||||
token = doc[2]
|
||||
with doc.retokenize() as retokenizer:
|
||||
retokenizer.split(token, ["brown", "fox", "jumps", "over", "the"], heads=[(token, idx) for idx in range(5)])
|
||||
retokenizer.split(
|
||||
token,
|
||||
["brown", "fox", "jumps", "over", "the"],
|
||||
heads=[(token, idx) for idx in range(5)],
|
||||
)
|
||||
|
||||
assert doc[9].text == "w/"
|
||||
assert doc[9].norm_ == "with"
|
||||
|
|
|
@ -350,7 +350,7 @@ def test_pipe_methods_frozen():
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"pipe", ["tagger", "parser", "ner", "textcat", "morphologizer"],
|
||||
"pipe", ["tagger", "parser", "ner", "textcat", "morphologizer"]
|
||||
)
|
||||
def test_pipe_label_data_exports_labels(pipe):
|
||||
nlp = Language()
|
||||
|
|
|
@ -64,7 +64,7 @@ def get_tok2vec_kwargs():
|
|||
width=32,
|
||||
rows=[500, 500, 500],
|
||||
attrs=["NORM", "PREFIX", "SHAPE"],
|
||||
include_static_vectors=False
|
||||
include_static_vectors=False,
|
||||
),
|
||||
"encode": MaxoutWindowEncoder(
|
||||
width=32, depth=2, maxout_pieces=2, window_size=1
|
||||
|
@ -81,7 +81,7 @@ def test_multi_hash_embed():
|
|||
width=32,
|
||||
rows=[500, 500, 500],
|
||||
attrs=["NORM", "PREFIX", "SHAPE"],
|
||||
include_static_vectors=False
|
||||
include_static_vectors=False,
|
||||
)
|
||||
hash_embeds = [node for node in embed.walk() if node.name == "hashembed"]
|
||||
assert len(hash_embeds) == 3
|
||||
|
@ -96,7 +96,7 @@ def test_multi_hash_embed():
|
|||
width=32,
|
||||
rows=[1000, 50, 250],
|
||||
attrs=["NORM", "PREFIX", "SHAPE"],
|
||||
include_static_vectors=False
|
||||
include_static_vectors=False,
|
||||
)
|
||||
hash_embeds = [node for node in embed.walk() if node.name == "hashembed"]
|
||||
assert [he.get_dim("nV") for he in hash_embeds] == [1000, 50, 250]
|
||||
|
|
|
@ -64,7 +64,7 @@ def dont_augment(nlp: "Language", example: Example) -> Iterator[Example]:
|
|||
|
||||
|
||||
def lower_casing_augmenter(
|
||||
nlp: "Language", example: Example, *, level: float,
|
||||
nlp: "Language", example: Example, *, level: float
|
||||
) -> Iterator[Example]:
|
||||
if random.random() >= level:
|
||||
yield example
|
||||
|
|
Loading…
Reference in New Issue
Block a user