* Remove regularization cruft from _ml, move score from .pxd file to .pyx

2025-07-10 16:22:29 +03:00 · 2015-05-31 18:48:05 +02:00 · 2015-05-31 18:48:05 +02:00 · d82f9d958d
commit d82f9d958d
parent 08044ea70c
2 changed files with 7 additions and 23 deletions
--- a/spacy/_ml.pxd
+++ b/spacy/_ml.pxd
@ -18,18 +18,10 @@ cdef int arg_max(const weight_t* scores, const int n_classes) nogil
 cdef class Model:
    cdef int n_classes
-    cdef int regularize(self, Feature* feats, int n, int a=*) except -1
+    cdef const weight_t* score(self, atom_t* context, bint regularize) except NULL
    cdef int update(self, atom_t* context, class_t guess, class_t gold, int cost) except -1
    cdef object model_loc
    cdef Extractor _extractor
    cdef LinearModel _model
    cdef inline const weight_t* score(self, atom_t* context, bint regularize) except NULL:
        cdef int n_feats
        feats = self._extractor.get_feats(context, &n_feats)
        if regularize:
            self.regularize(feats, n_feats, 3)
        return self._model.get_scores(feats, n_feats)
--- a/spacy/_ml.pyx
+++ b/spacy/_ml.pyx
@ -33,6 +33,11 @@ cdef class Model:
        if self.model_loc and path.exists(self.model_loc):
            self._model.load(self.model_loc, freq_thresh=0)
    cdef const weight_t* score(self, atom_t* context, bint regularize) except NULL:
        cdef int n_feats
        feats = self._extractor.get_feats(context, &n_feats)
        return self._model.get_scores(feats, n_feats)
    cdef int update(self, atom_t* context, class_t guess, class_t gold, int cost) except -1:
        cdef int n_feats
        if cost == 0:
@ -44,19 +49,6 @@ cdef class Model:
            count_feats(counts[guess], feats, n_feats, -cost)
            self._model.update(counts)
    @cython.cdivision
    @cython.boundscheck(False)
    cdef int regularize(self, Feature* feats, int n, int a=3) except -1:
        pass
        # Disable this for now, while we investigate effect.
        # Use the Zipfian corruptions technique from here:
        # http://www.aclweb.org/anthology/N13-1077
        # This seems good for 0.1 - 0.3 % on OOD data.
        #cdef int i
        #cdef long[:] zipfs = numpy.random.zipf(a, n)
        #for i in range(n):
        #    feats[i].value *= 1 / zipfs[i]
    def end_training(self):
        self._model.end_training()
        self._model.dump(self.model_loc, freq_thresh=0)