Auto-format [ci skip]

This commit is contained in:
Ines Montani 2019-10-30 19:27:18 +01:00
parent afe4a428f7
commit 5e9849b60f
3 changed files with 24 additions and 15 deletions

View File

@ -360,7 +360,7 @@ def Tok2Vec(width, embed_size, **kwargs):
"config": { "config": {
"vectors_name": pretrained_vectors, "vectors_name": pretrained_vectors,
"width": width, "width": width,
"column": cols.index("ID") "column": cols.index("ID"),
}, },
} }
if cnn_maxout_pieces >= 2: if cnn_maxout_pieces >= 2:

View File

@ -45,12 +45,15 @@ def MultiHashEmbed(config):
norm = HashEmbed(width, rows, column=cols.index("NORM"), name="embed_norm") norm = HashEmbed(width, rows, column=cols.index("NORM"), name="embed_norm")
if config["use_subwords"]: if config["use_subwords"]:
prefix = HashEmbed(width, rows // 2, prefix = HashEmbed(
column=cols.index("PREFIX"), name="embed_prefix") width, rows // 2, column=cols.index("PREFIX"), name="embed_prefix"
suffix = HashEmbed(width, rows // 2, )
column=cols.index("SUFFIX"), name="embed_suffix") suffix = HashEmbed(
shape = HashEmbed(width, rows // 2, width, rows // 2, column=cols.index("SUFFIX"), name="embed_suffix"
column=cols.index("SHAPE"), name="embed_shape") )
shape = HashEmbed(
width, rows // 2, column=cols.index("SHAPE"), name="embed_shape"
)
if config.get("@pretrained_vectors"): if config.get("@pretrained_vectors"):
glove = make_layer(config["@pretrained_vectors"]) glove = make_layer(config["@pretrained_vectors"])
mix = make_layer(config["@mix"]) mix = make_layer(config["@mix"])
@ -60,20 +63,16 @@ def MultiHashEmbed(config):
mix._layers[0].nI = width * 5 mix._layers[0].nI = width * 5
layer = uniqued( layer = uniqued(
(glove | norm | prefix | suffix | shape) >> mix, (glove | norm | prefix | suffix | shape) >> mix,
column=cols.index("ORTH") column=cols.index("ORTH"),
) )
elif config["use_subwords"]: elif config["use_subwords"]:
mix._layers[0].nI = width * 4 mix._layers[0].nI = width * 4
layer = uniqued( layer = uniqued(
(norm | prefix | suffix | shape) >> mix, (norm | prefix | suffix | shape) >> mix, column=cols.index("ORTH")
column=cols.index("ORTH")
) )
elif config["@pretrained_vectors"]: elif config["@pretrained_vectors"]:
mix._layers[0].nI = width * 2 mix._layers[0].nI = width * 2
embed = uniqued( embed = uniqued((glove | norm) >> mix, column=cols.index("ORTH"),)
(glove | norm) >> mix,
column=cols.index("ORTH"),
)
else: else:
embed = norm embed = norm
layer.cfg = config layer.cfg = config

View File

@ -21,4 +21,14 @@ def doc(en_tokenizer):
def test_merge_subtokens(doc): def test_merge_subtokens(doc):
doc = merge_subtokens(doc) doc = merge_subtokens(doc)
# get_doc() doesn't set spaces, so the result is "And a third ." # get_doc() doesn't set spaces, so the result is "And a third ."
assert [t.text for t in doc] == ["This", "is", "a sentence", ".", "This", "is", "another sentence", ".", "And a third ."] assert [t.text for t in doc] == [
"This",
"is",
"a sentence",
".",
"This",
"is",
"another sentence",
".",
"And a third .",
]