diff --git a/spacy/_ml.py b/spacy/_ml.py index 3db28ced5..2d4064652 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -360,7 +360,7 @@ def Tok2Vec(width, embed_size, **kwargs): "config": { "vectors_name": pretrained_vectors, "width": width, - "column": cols.index("ID") + "column": cols.index("ID"), }, } if cnn_maxout_pieces >= 2: diff --git a/spacy/ml/tok2vec.py b/spacy/ml/tok2vec.py index d24b9d0c7..4f3cd458d 100644 --- a/spacy/ml/tok2vec.py +++ b/spacy/ml/tok2vec.py @@ -45,12 +45,15 @@ def MultiHashEmbed(config): norm = HashEmbed(width, rows, column=cols.index("NORM"), name="embed_norm") if config["use_subwords"]: - prefix = HashEmbed(width, rows // 2, - column=cols.index("PREFIX"), name="embed_prefix") - suffix = HashEmbed(width, rows // 2, - column=cols.index("SUFFIX"), name="embed_suffix") - shape = HashEmbed(width, rows // 2, - column=cols.index("SHAPE"), name="embed_shape") + prefix = HashEmbed( + width, rows // 2, column=cols.index("PREFIX"), name="embed_prefix" + ) + suffix = HashEmbed( + width, rows // 2, column=cols.index("SUFFIX"), name="embed_suffix" + ) + shape = HashEmbed( + width, rows // 2, column=cols.index("SHAPE"), name="embed_shape" + ) if config.get("@pretrained_vectors"): glove = make_layer(config["@pretrained_vectors"]) mix = make_layer(config["@mix"]) @@ -60,20 +63,16 @@ def MultiHashEmbed(config): mix._layers[0].nI = width * 5 layer = uniqued( (glove | norm | prefix | suffix | shape) >> mix, - column=cols.index("ORTH") + column=cols.index("ORTH"), ) elif config["use_subwords"]: mix._layers[0].nI = width * 4 layer = uniqued( - (norm | prefix | suffix | shape) >> mix, - column=cols.index("ORTH") + (norm | prefix | suffix | shape) >> mix, column=cols.index("ORTH") ) elif config["@pretrained_vectors"]: mix._layers[0].nI = width * 2 - embed = uniqued( - (glove | norm) >> mix, - column=cols.index("ORTH"), - ) + embed = uniqued((glove | norm) >> mix, column=cols.index("ORTH"),) else: embed = norm layer.cfg = config diff --git a/spacy/tests/pipeline/test_functions.py b/spacy/tests/pipeline/test_functions.py index fbb88ade2..5b5fcd2fd 100644 --- a/spacy/tests/pipeline/test_functions.py +++ b/spacy/tests/pipeline/test_functions.py @@ -21,4 +21,14 @@ def doc(en_tokenizer): def test_merge_subtokens(doc): doc = merge_subtokens(doc) # get_doc() doesn't set spaces, so the result is "And a third ." - assert [t.text for t in doc] == ["This", "is", "a sentence", ".", "This", "is", "another sentence", ".", "And a third ."] + assert [t.text for t in doc] == [ + "This", + "is", + "a sentence", + ".", + "This", + "is", + "another sentence", + ".", + "And a third .", + ]