Fiddle with nll loss in parser

2025-08-04 12:20:20 +03:00 · 2016-09-04 16:56:47 +02:00 · 2016-09-04 16:56:47 +02:00 · 2bfe184692
commit 2bfe184692
parent ed23476c82
2 changed files with 50 additions and 17 deletions
--- a/spacy/syntax/_neural.pyx
+++ b/spacy/syntax/_neural.pyx
@ -2,6 +2,7 @@
 # cython: profile=True
 from libc.stdint cimport uint64_t
 from libc.string cimport memcpy, memset
+from libc.math cimport sqrt

 from cymem.cymem cimport Pool, Address
 from murmurhash.mrmr cimport hash64
@ -12,6 +13,7 @@ from thinc.linalg cimport VecVec
 from thinc.structs cimport NeuralNetC, SparseArrayC, ExampleC
 from thinc.structs cimport FeatureC
 from thinc.extra.eg cimport Example
+from thinc.neural.forward cimport softmax

 from preshed.maps cimport map_get
 from preshed.maps cimport MapStruct
@ -31,21 +33,48 @@ cdef class ParserPerceptron(AveragedPerceptron):
    def widths(self):
        return (self.extracter.nr_templ,)

-    def update(self, Example eg):
-        '''Does regression on negative cost. Sort of cute?'''
+    def update(self, Example eg, loss='regression'):
        self.time += 1
-        cdef weight_t loss = 0.0
        best = eg.best
-        for clas in range(eg.c.nr_class):
-            if not eg.c.is_valid[clas]:
-                continue
-            if eg.c.scores[clas] < eg.c.scores[best]:
-                continue
-            loss += (-eg.c.costs[clas] - eg.c.scores[clas]) ** 2
-            d_loss = 2 * (-eg.c.costs[clas] - eg.c.scores[clas])
+        guess = eg.guess
+        assert best >= 0, best
+        assert guess >= 0, guess
+        d_losses = {}
+        if loss == 'regression':
+            # Does regression on negative cost. Sort of cute?
+            # Clip to guess and best, to keep gradient sparse.
+            d_losses[guess] = -2 * (-eg.c.costs[guess] - eg.c.scores[guess])
+            d_losses[best] = -2 * (-eg.c.costs[best] - eg.c.scores[best])
+        elif loss == 'nll':
+            # Clip to guess and best, to keep gradient sparse.
+            if eg.c.scores[guess] == 0.0:
+                d_losses[guess] = 1.0
+                d_losses[best] = -1.0
+            else:
+                softmax(eg.c.scores, eg.c.nr_class)
+                for i in range(eg.c.nr_class):
+                    if eg.c.is_valid[i] \
+                    and eg.c.scores[i] >= eg.c.scores[best]:
+                        d_losses[i] = eg.c.scores[i] - (eg.c.costs[i] <= 0)
+        elif loss == 'hinge':
+            for i in range(eg.c.nr_class):
+                if eg.c.is_valid[i] \
+                and eg.c.costs[i] > 0 \
+                and eg.c.scores[i] > (eg.c.scores[best]-1):
+                    margin = eg.c.scores[i] - (eg.c.scores[best] - 1)
+                    d_losses[i] = margin
+                    d_losses[best] = min(-margin, d_losses.get(best, 0.0))
+        elif loss == 'perceptron':
+            if guess != best:
+                d_losses = {best: -1.0, guess: 1.0}
+        step = 0.0
+        i = 0
+        for clas, d_loss in d_losses.items():
            for feat in eg.c.features[:eg.c.nr_feat]:
-                self.update_weight(feat.key, clas, feat.value * -d_loss)
-        return int(loss)
+                step += abs(self.update_weight(feat.key, clas, feat.value * d_loss))
+                i += 1
+        self.total_L1 += self.l1_penalty *  self.learn_rate
+        return sum(map(abs, d_losses.values()))

    cdef int set_featuresC(self, FeatureC* feats, const void* _state) nogil: 
        cdef atom_t[CONTEXT_SIZE] context
--- a/spacy/syntax/parser.pyx
+++ b/spacy/syntax/parser.pyx
@ -21,6 +21,7 @@ import json
 import sys
 from .nonproj import PseudoProjectivity
 import random
+import numpy.random

 from cymem.cymem cimport Pool, Address
 from murmurhash.mrmr cimport hash64
@ -203,15 +204,18 @@ cdef class Parser:
        cdef Transition action
        while not stcls.is_final():
            eg.c.nr_feat = self.model.set_featuresC(eg.c.features, stcls.c)
+            self.model.dropoutC(eg.c.features,
+                0.5, eg.c.nr_feat)
+            if eg.c.features[0].i == 1:
+                eg.c.features[0].value = 1.0
+            #for i in range(eg.c.nr_feat):
+            #    if eg.c.features[i].value != 0:
+            #        self.model.apply_L1(eg.c.features[i].key)
            self.model.set_scoresC(eg.c.scores, eg.c.features, eg.c.nr_feat)
            self.moves.set_costs(eg.c.is_valid, eg.c.costs, stcls, gold)
-            for i in range(self.moves.n_moves):
-                if eg.c.costs[i] < 0:
-                    eg.c.costs[i] = 0
            action = self.moves.c[eg.guess]
            action.do(stcls.c, action.label)
-            
-            loss += self.model.update(eg)
+            loss += self.model.update(eg, loss='nll')
            eg.reset()
        return loss