From 2df563ad24d7df2f6368b5ff66657abd6cc0f9d3 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sun, 23 Jul 2017 14:10:51 +0200
Subject: [PATCH] Remove optimization for textcat that caused loading problem

---
 spacy/_ml.py | 67 +++++++++++++---------------------------------------
 1 file changed, 16 insertions(+), 51 deletions(-)

diff --git a/spacy/_ml.py b/spacy/_ml.py
index f3cfe069f..2d0910a53 100644
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@@ -15,6 +15,7 @@ from thinc.describe import Dimension, Synapses, Biases, Gradient
 from thinc.neural._classes.affine import _set_dimensions_if_needed
 from thinc.api import FeatureExtracter, with_getitem
 from thinc.neural.pooling import Pooling, max_pool, mean_pool
+from thinc.linear.linear import LinearModel
 
 from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP
 from .tokens.doc import Doc
@@ -365,51 +366,6 @@ def preprocess_doc(docs, drop=0.):
     return (keys, vals, lengths), None
 
 
-# This belongs in thinc
-def wrap(func, *child_layers):
-    model = layerize(func)
-    model._layers.extend(child_layers)
-    def on_data(self, X, y):
-        for child in self._layers:
-            for hook in child.on_data_hooks:
-                hook(child, X, y)
-    model.on_data_hooks.append(on_data)
-    return model
-
-# This belongs in thinc
-def uniqued(layer, column=0):
-    '''Group inputs to a layer, so that the layer only has to compute
-    for the unique values. The data is transformed back before output, and the same
-    transformation is applied for the gradient. Effectively, this is a cache
-    local to each minibatch.
-
-    The uniqued wrapper is useful for word inputs, because common words are
-    seen often, but we may want to compute complicated features for the words,
-    using e.g. character LSTM.
-    '''
-    def uniqued_fwd(X, drop=0.):
-        keys = X[:, column]
-        if not isinstance(keys, numpy.ndarray):
-            keys = keys.get()
-        uniq_keys, ind, inv, counts = numpy.unique(keys, return_index=True,
-                                                    return_inverse=True,
-                                                    return_counts=True)
-        Y_uniq, bp_Y_uniq = layer.begin_update(X[ind], drop=drop)
-        Y = Y_uniq[inv].reshape((X.shape[0],) + Y_uniq.shape[1:])
-        def uniqued_bwd(dY, sgd=None):
-            dY_uniq = layer.ops.allocate(Y_uniq.shape, dtype='f')
-            layer.ops.scatter_add(dY_uniq, inv, dY)
-            d_uniques = bp_Y_uniq(dY_uniq, sgd=sgd)
-            if d_uniques is not None:
-                dX = (d_uniques / counts)[inv]
-                return dX
-            else:
-                return None
-        return Y, uniqued_bwd
-    model = wrap(uniqued_fwd, layer)
-    return model
-
-
 def build_text_classifier(nr_class, width=64, **cfg):
     nr_vector = cfg.get('nr_vector', 1000)
     with Model.define_operators({'>>': chain, '+': add, '|': concatenate, '**': clone}):
@@ -418,23 +374,32 @@ def build_text_classifier(nr_class, width=64, **cfg):
         embed_suffix = HashEmbed(width//2, nr_vector, column=3)
         embed_shape = HashEmbed(width//2, nr_vector, column=4)
 
-        model = (
+        cnn_model = (
             FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE])
             >> _flatten_add_lengths
             >> with_getitem(0,
-                uniqued(
-                    (embed_lower | embed_prefix | embed_suffix | embed_shape) 
-                    >> Maxout(width, width+(width//2)*3)
-                )
+                (embed_lower | embed_prefix | embed_suffix | embed_shape) 
+                >> Maxout(width, width+(width//2)*3)
                 >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3))
                 >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3))
                 >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3))
             )
             >> Pooling(mean_pool, max_pool)
             >> Residual(ReLu(width*2, width*2))
-            >> zero_init(Affine(nr_class, width*2, drop_factor=0.0))
+        )
+        linear_model = (
+            _preprocess_doc
+            >> LinearModel(nr_class)
             >> logistic
         )
+
+        model = (
+            #(linear_model | cnn_model)
+            cnn_model
+            >> zero_init(Affine(nr_class, width*2+nr_class, drop_factor=0.0))
+            >> logistic
+        )
+ 
     model.lsuv = False
     return model