Auto-format [ci skip]

This commit is contained in:
Ines Montani 2020-10-05 21:58:18 +02:00
parent 1a554bdcb1
commit 126268ce50
6 changed files with 18 additions and 14 deletions

View File

@ -26,7 +26,9 @@ class Ukrainian(Language):
default_config={"model": None, "mode": "pymorphy2"}, default_config={"model": None, "mode": "pymorphy2"},
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
def make_lemmatizer(nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool = False,): def make_lemmatizer(
nlp: Language, model: Optional[Model], name: str, mode: str, overwrite: bool = False
):
return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite) return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode, overwrite=overwrite)

View File

@ -54,9 +54,7 @@ def create_chinese_tokenizer(segmenter: Segmenter = Segmenter.char):
class ChineseTokenizer(DummyTokenizer): class ChineseTokenizer(DummyTokenizer):
def __init__( def __init__(self, nlp: Language, segmenter: Segmenter = Segmenter.char):
self, nlp: Language, segmenter: Segmenter = Segmenter.char,
):
self.vocab = nlp.vocab self.vocab = nlp.vocab
if isinstance(segmenter, Segmenter): if isinstance(segmenter, Segmenter):
segmenter = segmenter.value segmenter = segmenter.value
@ -87,7 +85,7 @@ class ChineseTokenizer(DummyTokenizer):
if pkuseg_user_dict is None: if pkuseg_user_dict is None:
pkuseg_user_dict = pkuseg_model pkuseg_user_dict = pkuseg_model
self.pkuseg_seg = try_pkuseg_import( self.pkuseg_seg = try_pkuseg_import(
pkuseg_model=pkuseg_model, pkuseg_user_dict=pkuseg_user_dict, pkuseg_model=pkuseg_model, pkuseg_user_dict=pkuseg_user_dict
) )
def __call__(self, text: str) -> Doc: def __call__(self, text: str) -> Doc:

View File

@ -209,9 +209,13 @@ def test_doc_retokenizer_split_norm(en_vocab):
# Retokenize to split out the words in the token at doc[2]. # Retokenize to split out the words in the token at doc[2].
token = doc[2] token = doc[2]
with doc.retokenize() as retokenizer: with doc.retokenize() as retokenizer:
retokenizer.split(token, ["brown", "fox", "jumps", "over", "the"], heads=[(token, idx) for idx in range(5)]) retokenizer.split(
token,
["brown", "fox", "jumps", "over", "the"],
heads=[(token, idx) for idx in range(5)],
)
assert doc[9].text == "w/" assert doc[9].text == "w/"
assert doc[9].norm_ == "with" assert doc[9].norm_ == "with"
assert doc[5].text == "over" assert doc[5].text == "over"
assert doc[5].norm_ == "over" assert doc[5].norm_ == "over"

View File

@ -350,7 +350,7 @@ def test_pipe_methods_frozen():
@pytest.mark.parametrize( @pytest.mark.parametrize(
"pipe", ["tagger", "parser", "ner", "textcat", "morphologizer"], "pipe", ["tagger", "parser", "ner", "textcat", "morphologizer"]
) )
def test_pipe_label_data_exports_labels(pipe): def test_pipe_label_data_exports_labels(pipe):
nlp = Language() nlp = Language()

View File

@ -64,7 +64,7 @@ def get_tok2vec_kwargs():
width=32, width=32,
rows=[500, 500, 500], rows=[500, 500, 500],
attrs=["NORM", "PREFIX", "SHAPE"], attrs=["NORM", "PREFIX", "SHAPE"],
include_static_vectors=False include_static_vectors=False,
), ),
"encode": MaxoutWindowEncoder( "encode": MaxoutWindowEncoder(
width=32, depth=2, maxout_pieces=2, window_size=1 width=32, depth=2, maxout_pieces=2, window_size=1
@ -81,7 +81,7 @@ def test_multi_hash_embed():
width=32, width=32,
rows=[500, 500, 500], rows=[500, 500, 500],
attrs=["NORM", "PREFIX", "SHAPE"], attrs=["NORM", "PREFIX", "SHAPE"],
include_static_vectors=False include_static_vectors=False,
) )
hash_embeds = [node for node in embed.walk() if node.name == "hashembed"] hash_embeds = [node for node in embed.walk() if node.name == "hashembed"]
assert len(hash_embeds) == 3 assert len(hash_embeds) == 3
@ -96,11 +96,11 @@ def test_multi_hash_embed():
width=32, width=32,
rows=[1000, 50, 250], rows=[1000, 50, 250],
attrs=["NORM", "PREFIX", "SHAPE"], attrs=["NORM", "PREFIX", "SHAPE"],
include_static_vectors=False include_static_vectors=False,
) )
hash_embeds = [node for node in embed.walk() if node.name == "hashembed"] hash_embeds = [node for node in embed.walk() if node.name == "hashembed"]
assert [he.get_dim("nV") for he in hash_embeds] == [1000, 50, 250] assert [he.get_dim("nV") for he in hash_embeds] == [1000, 50, 250]
@pytest.mark.parametrize( @pytest.mark.parametrize(
"seed,model_func,kwargs", "seed,model_func,kwargs",

View File

@ -64,7 +64,7 @@ def dont_augment(nlp: "Language", example: Example) -> Iterator[Example]:
def lower_casing_augmenter( def lower_casing_augmenter(
nlp: "Language", example: Example, *, level: float, nlp: "Language", example: Example, *, level: float
) -> Iterator[Example]: ) -> Iterator[Example]:
if random.random() >= level: if random.random() >= level:
yield example yield example