From 0eec1d12affa9c8301612cfb5cddf706d2628e2b Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 27 May 2015 01:14:07 +0200 Subject: [PATCH] * Add comment about zipf reweighting --- spacy/_ml.pyx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/spacy/_ml.pyx b/spacy/_ml.pyx index a2b943589..3a439e2ba 100644 --- a/spacy/_ml.pyx +++ b/spacy/_ml.pyx @@ -47,6 +47,9 @@ cdef class Model: @cython.cdivision @cython.boundscheck(False) cdef int regularize(self, Feature* feats, int n, int a=3) except -1: + # Use the Zipfian corruptions technique from here: + # http://www.aclweb.org/anthology/N13-1077 + # This seems good for 0.1 - 0.3 % on OOD data. cdef int i cdef long[:] zipfs = numpy.random.zipf(a, n) for i in range(n):