Use Mish layer if pieces==1 in CNN

This commit is contained in:
Matthw Honnibal 2019-10-22 03:26:27 +02:00
parent 7ef3bcdc1c
commit ab7f85dfa2

View File

@ -2,7 +2,7 @@
from __future__ import unicode_literals
import numpy
from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu
from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu, Mish
from thinc.i2v import HashEmbed, StaticVectors
from thinc.t2t import ExtractWindow, ParametricAttention
from thinc.t2v import Pooling, sum_pool, mean_pool, max_pool
@ -55,7 +55,8 @@ def create_default_optimizer(ops, **cfg):
eps = util.env_opt("optimizer_eps", 1e-8)
L2 = util.env_opt("L2_penalty", 1e-6)
max_grad_norm = util.env_opt("grad_norm_clip", 1.0)
optimizer = Adam(ops, learn_rate, L2=L2, beta1=beta1, beta2=beta2, eps=eps)
optimizer = Adam(ops, learn_rate, L2=L2, beta1=beta1, beta2=beta2, eps=eps,
lookahead_k=6, lookahead_alpha=0.5, use_lars=True, use_radam=True)
optimizer.max_grad_norm = max_grad_norm
optimizer.device = ops.device
return optimizer
@ -375,6 +376,12 @@ def Tok2Vec_chars_bilstm(width, embed_size, **kwargs):
def CNN(width, depth, pieces, nW=1):
if pieces == 1:
layer = chain(
ExtractWindow(nW=nW),
LN(Mish(width, width * (nW*2+1)))
)
else:
layer = chain(
ExtractWindow(nW=nW),
LN(Maxout(width, width * (nW*2+1), pieces=pieces)))