mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-15 06:09:01 +03:00
Use Mish layer if pieces==1 in CNN
This commit is contained in:
parent
7ef3bcdc1c
commit
ab7f85dfa2
17
spacy/_ml.py
17
spacy/_ml.py
|
@ -2,7 +2,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu
|
from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu, Mish
|
||||||
from thinc.i2v import HashEmbed, StaticVectors
|
from thinc.i2v import HashEmbed, StaticVectors
|
||||||
from thinc.t2t import ExtractWindow, ParametricAttention
|
from thinc.t2t import ExtractWindow, ParametricAttention
|
||||||
from thinc.t2v import Pooling, sum_pool, mean_pool, max_pool
|
from thinc.t2v import Pooling, sum_pool, mean_pool, max_pool
|
||||||
|
@ -55,7 +55,8 @@ def create_default_optimizer(ops, **cfg):
|
||||||
eps = util.env_opt("optimizer_eps", 1e-8)
|
eps = util.env_opt("optimizer_eps", 1e-8)
|
||||||
L2 = util.env_opt("L2_penalty", 1e-6)
|
L2 = util.env_opt("L2_penalty", 1e-6)
|
||||||
max_grad_norm = util.env_opt("grad_norm_clip", 1.0)
|
max_grad_norm = util.env_opt("grad_norm_clip", 1.0)
|
||||||
optimizer = Adam(ops, learn_rate, L2=L2, beta1=beta1, beta2=beta2, eps=eps)
|
optimizer = Adam(ops, learn_rate, L2=L2, beta1=beta1, beta2=beta2, eps=eps,
|
||||||
|
lookahead_k=6, lookahead_alpha=0.5, use_lars=True, use_radam=True)
|
||||||
optimizer.max_grad_norm = max_grad_norm
|
optimizer.max_grad_norm = max_grad_norm
|
||||||
optimizer.device = ops.device
|
optimizer.device = ops.device
|
||||||
return optimizer
|
return optimizer
|
||||||
|
@ -375,9 +376,15 @@ def Tok2Vec_chars_bilstm(width, embed_size, **kwargs):
|
||||||
|
|
||||||
|
|
||||||
def CNN(width, depth, pieces, nW=1):
|
def CNN(width, depth, pieces, nW=1):
|
||||||
layer = chain(
|
if pieces == 1:
|
||||||
ExtractWindow(nW=nW),
|
layer = chain(
|
||||||
LN(Maxout(width, width * (nW*2+1), pieces=pieces)))
|
ExtractWindow(nW=nW),
|
||||||
|
LN(Mish(width, width * (nW*2+1)))
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
layer = chain(
|
||||||
|
ExtractWindow(nW=nW),
|
||||||
|
LN(Maxout(width, width * (nW*2+1), pieces=pieces)))
|
||||||
return clone(Residual(layer), depth)
|
return clone(Residual(layer), depth)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user