mirror of
https://github.com/explosion/spaCy.git
synced 2025-05-02 23:03:41 +03:00
Add MultiSoftmax class
Add a new class for the Tagger model, MultiSoftmax. This allows softmax prediction of multiple classes on the same output layer, e.g. one variable with 3 classes, another with 4 classes. This makes a layer with 7 output neurons, which we softmax into two distributions.
This commit is contained in:
parent
052c45dc2f
commit
b10d0cce05
44
spacy/_ml.py
44
spacy/_ml.py
|
@ -444,7 +444,46 @@ def getitem(i):
|
||||||
return layerize(getitem_fwd)
|
return layerize(getitem_fwd)
|
||||||
|
|
||||||
|
|
||||||
def build_tagger_model(nr_class, **cfg):
|
@describe.attributes(
|
||||||
|
W=Synapses("Weights matrix",
|
||||||
|
lambda obj: (obj.nO, obj.nI),
|
||||||
|
lambda W, ops: None)
|
||||||
|
)
|
||||||
|
class MultiSoftmax(Affine):
|
||||||
|
'''Neural network layer that predicts several multi-class attributes at once.
|
||||||
|
For instance, we might predict one class with 6 variables, and another with 5.
|
||||||
|
We predict the 11 neurons required for this, and then softmax them such
|
||||||
|
that columns 0-6 make a probability distribution and coumns 6-11 make another.
|
||||||
|
'''
|
||||||
|
name = 'multisoftmax'
|
||||||
|
|
||||||
|
def __init__(self, out_sizes, nI=None, **kwargs):
|
||||||
|
Model.__init__(self, **kwargs)
|
||||||
|
self.out_sizes = out_sizes
|
||||||
|
self.nO = sum(out_sizes)
|
||||||
|
self.nI = nI
|
||||||
|
|
||||||
|
def predict(self, input__BI):
|
||||||
|
output__BO = self.ops.affine(self.W, self.b, input__BI)
|
||||||
|
i = 0
|
||||||
|
for out_size in self.out_sizes:
|
||||||
|
self.ops.softmax(output__BO[:, i : i+out_size], inplace=True)
|
||||||
|
i += out_size
|
||||||
|
return output__BO
|
||||||
|
|
||||||
|
def begin_update(self, input__BI, drop=0.):
|
||||||
|
output__BO = self.predict(input__BI)
|
||||||
|
def finish_update(grad__BO, sgd=None):
|
||||||
|
self.d_W += self.ops.gemm(grad__BO, input__BI, trans1=True)
|
||||||
|
self.d_b += grad__BO.sum(axis=0)
|
||||||
|
grad__BI = self.ops.gemm(grad__BO, self.W)
|
||||||
|
if sgd is not None:
|
||||||
|
sgd(self._mem.weights, self._mem.gradient, key=self.id)
|
||||||
|
return grad__BI
|
||||||
|
return output__BO, finish_update
|
||||||
|
|
||||||
|
|
||||||
|
def build_tagger_model(class_nums, **cfg):
|
||||||
embed_size = util.env_opt('embed_size', 7000)
|
embed_size = util.env_opt('embed_size', 7000)
|
||||||
if 'token_vector_width' in cfg:
|
if 'token_vector_width' in cfg:
|
||||||
token_vector_width = cfg['token_vector_width']
|
token_vector_width = cfg['token_vector_width']
|
||||||
|
@ -459,7 +498,8 @@ def build_tagger_model(nr_class, **cfg):
|
||||||
tok2vec = Tok2Vec(token_vector_width, embed_size,
|
tok2vec = Tok2Vec(token_vector_width, embed_size,
|
||||||
subword_features=subword_features,
|
subword_features=subword_features,
|
||||||
pretrained_vectors=pretrained_vectors)
|
pretrained_vectors=pretrained_vectors)
|
||||||
softmax = with_flatten(Softmax(nr_class, token_vector_width))
|
softmax = with_flatten(
|
||||||
|
MultiSoftmax(class_nums, token_vector_width))
|
||||||
model = (
|
model = (
|
||||||
tok2vec
|
tok2vec
|
||||||
>> softmax
|
>> softmax
|
||||||
|
|
Loading…
Reference in New Issue
Block a user