mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-15 14:17:58 +03:00
Move characters_loss function, add window option
This commit is contained in:
parent
5a601ef46a
commit
77af446d04
44
spacy/_ml.py
44
spacy/_ml.py
|
@ -5,7 +5,7 @@ import numpy
|
||||||
from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu
|
from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu
|
||||||
from thinc.i2v import HashEmbed, StaticVectors
|
from thinc.i2v import HashEmbed, StaticVectors
|
||||||
from thinc.t2t import ExtractWindow, ParametricAttention
|
from thinc.t2t import ExtractWindow, ParametricAttention
|
||||||
from thinc.t2v import Pooling, sum_pool, mean_pool
|
from thinc.t2v import Pooling, sum_pool, mean_pool, max_pool
|
||||||
from thinc.misc import Residual
|
from thinc.misc import Residual
|
||||||
from thinc.misc import LayerNorm as LN
|
from thinc.misc import LayerNorm as LN
|
||||||
from thinc.misc import FeatureExtracter
|
from thinc.misc import FeatureExtracter
|
||||||
|
@ -15,7 +15,7 @@ from thinc.api import uniqued, wrap, noop
|
||||||
from thinc.api import with_square_sequences
|
from thinc.api import with_square_sequences
|
||||||
from thinc.linear.linear import LinearModel
|
from thinc.linear.linear import LinearModel
|
||||||
from thinc.neural.ops import NumpyOps, CupyOps
|
from thinc.neural.ops import NumpyOps, CupyOps
|
||||||
from thinc.neural.util import get_array_module, copy_array
|
from thinc.neural.util import get_array_module, copy_array, to_categorical
|
||||||
from thinc.neural.optimizers import Adam
|
from thinc.neural.optimizers import Adam
|
||||||
|
|
||||||
from thinc.t2t import prepare_self_attention, MultiHeadedAttention
|
from thinc.t2t import prepare_self_attention, MultiHeadedAttention
|
||||||
|
@ -374,10 +374,10 @@ def Tok2Vec_chars_bilstm(width, embed_size, **kwargs):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def CNN(width, depth, pieces):
|
def CNN(width, depth, pieces, nW=1):
|
||||||
layer = chain(
|
layer = chain(
|
||||||
ExtractWindow(nW=1),
|
ExtractWindow(nW=nW),
|
||||||
LN(Maxout(width, width * 3, pieces=pieces)))
|
LN(Maxout(width, width * (nW*2+1), pieces=pieces)))
|
||||||
return clone(Residual(layer), depth)
|
return clone(Residual(layer), depth)
|
||||||
|
|
||||||
|
|
||||||
|
@ -410,9 +410,11 @@ def Tok2Vec(width, embed_size, **kwargs):
|
||||||
conv_depth = kwargs.get("conv_depth", 4)
|
conv_depth = kwargs.get("conv_depth", 4)
|
||||||
bilstm_depth = util.env_opt("bilstm_depth", kwargs.get("bilstm_depth", 0))
|
bilstm_depth = util.env_opt("bilstm_depth", kwargs.get("bilstm_depth", 0))
|
||||||
self_attn_depth = util.env_opt("self_attn_depth", kwargs.get("self_attn_depth", 0))
|
self_attn_depth = util.env_opt("self_attn_depth", kwargs.get("self_attn_depth", 0))
|
||||||
|
conv_window = util.env_opt("conv_window", kwargs.get("cnn_window", 1))
|
||||||
kwargs.setdefault("bilstm_depth", bilstm_depth)
|
kwargs.setdefault("bilstm_depth", bilstm_depth)
|
||||||
kwargs.setdefault("self_attn_depth", self_attn_depth)
|
kwargs.setdefault("self_attn_depth", self_attn_depth)
|
||||||
kwargs.setdefault("char_embed", char_embed)
|
kwargs.setdefault("char_embed", char_embed)
|
||||||
|
kwargs.setdefault("conv_window", conv_window)
|
||||||
if char_embed and self_attn_depth:
|
if char_embed and self_attn_depth:
|
||||||
return Tok2Vec_chars_selfattention(width, embed_size, **kwargs)
|
return Tok2Vec_chars_selfattention(width, embed_size, **kwargs)
|
||||||
elif char_embed and bilstm_depth:
|
elif char_embed and bilstm_depth:
|
||||||
|
@ -459,16 +461,12 @@ def Tok2Vec(width, embed_size, **kwargs):
|
||||||
else:
|
else:
|
||||||
embed = norm
|
embed = norm
|
||||||
|
|
||||||
convolution = Residual(
|
|
||||||
ExtractWindow(nW=1)
|
|
||||||
>> LN(Maxout(width, width * 3, pieces=cnn_maxout_pieces))
|
|
||||||
)
|
|
||||||
tok2vec = (
|
tok2vec = (
|
||||||
FeatureExtracter(cols)
|
FeatureExtracter(cols)
|
||||||
>> with_flatten(
|
>> with_flatten(
|
||||||
embed
|
embed
|
||||||
>> CNN(width, conv_depth, cnn_maxout_pieces)
|
>> CNN(width, conv_depth, cnn_maxout_pieces, nW=conv_window),
|
||||||
)
|
pad=conv_depth * conv_window)
|
||||||
)
|
)
|
||||||
|
|
||||||
if bilstm_depth >= 1:
|
if bilstm_depth >= 1:
|
||||||
|
@ -628,12 +626,13 @@ class MultiSoftmax(Affine):
|
||||||
self.nI = nI
|
self.nI = nI
|
||||||
|
|
||||||
def predict(self, input__BI):
|
def predict(self, input__BI):
|
||||||
output__BO = self.ops.affine(self.W, self.b, input__BI)
|
logits = self.ops.affine(self.W, self.b, input__BI)
|
||||||
|
outputs = []
|
||||||
i = 0
|
i = 0
|
||||||
for out_size in self.out_sizes:
|
for out_size in self.out_sizes:
|
||||||
self.ops.softmax(output__BO[:, i : i + out_size], inplace=True)
|
outputs.append(self.ops.softmax(logits[:, i : i+out_size]))
|
||||||
i += out_size
|
i += out_size
|
||||||
return output__BO
|
return self.ops.xp.hstack(outputs)
|
||||||
|
|
||||||
def begin_update(self, input__BI, drop=0.0):
|
def begin_update(self, input__BI, drop=0.0):
|
||||||
output__BO = self.predict(input__BI)
|
output__BO = self.predict(input__BI)
|
||||||
|
@ -825,12 +824,12 @@ def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False,
|
||||||
"""
|
"""
|
||||||
with Model.define_operators({">>": chain}):
|
with Model.define_operators({">>": chain}):
|
||||||
if exclusive_classes:
|
if exclusive_classes:
|
||||||
output_layer = Softmax(nr_class, tok2vec.nO)
|
output_layer = Softmax(nr_class, tok2vec.nO*3)
|
||||||
else:
|
else:
|
||||||
output_layer = (
|
output_layer = (
|
||||||
zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
|
zero_init(Affine(nr_class, tok2vec.nO*3, drop_factor=0.0)) >> logistic
|
||||||
)
|
)
|
||||||
model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer
|
model = tok2vec >> flatten_add_lengths >> Pooling(sum_pool, mean_pool, max_pool) >> output_layer
|
||||||
model.tok2vec = chain(tok2vec, flatten)
|
model.tok2vec = chain(tok2vec, flatten)
|
||||||
model.nO = nr_class
|
model.nO = nr_class
|
||||||
return model
|
return model
|
||||||
|
@ -1054,6 +1053,17 @@ class CharacterEmbed(Model):
|
||||||
return output, backprop_character_embed
|
return output, backprop_character_embed
|
||||||
|
|
||||||
|
|
||||||
|
def get_characters_loss(ops, docs, prediction, nr_char=10):
|
||||||
|
target_ids = numpy.vstack([doc.to_utf8_array(nr_char=nr_char) for doc in docs])
|
||||||
|
target_ids = target_ids.reshape((-1,))
|
||||||
|
target = ops.asarray(to_categorical(target_ids, nb_classes=256), dtype="f")
|
||||||
|
target = target.reshape((-1, 256*nr_char))
|
||||||
|
diff = prediction - target
|
||||||
|
loss = (diff**2).sum()
|
||||||
|
d_target = diff / float(prediction.shape[0])
|
||||||
|
return loss, d_target
|
||||||
|
|
||||||
|
|
||||||
def get_cossim_loss(yh, y, ignore_zeros=False):
|
def get_cossim_loss(yh, y, ignore_zeros=False):
|
||||||
xp = get_array_module(yh)
|
xp = get_array_module(yh)
|
||||||
# Find the zero vectors
|
# Find the zero vectors
|
||||||
|
|
Loading…
Reference in New Issue
Block a user