From 633a75c7e06fb9b61091412c6cf7add92eadc368 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Wed, 18 Oct 2017 21:45:01 +0200
Subject: [PATCH 1/9] Break parser batches into sub-batches, sorted by length.

---
 spacy/syntax/nn_parser.pyx | 91 +++++++++++++++++++-------------------
 1 file changed, 45 insertions(+), 46 deletions(-)
diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index 1f4918935..f8e1baf35 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -9,6 +9,7 @@ from collections import Counter, OrderedDict
 import ujson
 import json
 import contextlib
+import numpy
 
 from libc.math cimport exp
 cimport cython
@@ -27,7 +28,7 @@ from libc.string cimport memset, memcpy
 from libc.stdlib cimport malloc, calloc, free
 from thinc.typedefs cimport weight_t, class_t, feat_t, atom_t, hash_t
 from thinc.linear.avgtron cimport AveragedPerceptron
-from thinc.linalg cimport VecVec
+from thinc.linalg cimport Vec, VecVec
 from thinc.structs cimport SparseArrayC, FeatureC, ExampleC
 from thinc.extra.eg cimport Example
 from thinc.extra.search cimport Beam
@@ -288,6 +289,8 @@ cdef class Parser:
                     zero_init(Affine(nr_class, hidden_width, drop_factor=0.0))
                 )
                 upper.is_noop = False
+                print(upper._layers)
+                print(upper._layers[0]._layers)
 
         # TODO: This is an unfortunate hack atm!
         # Used to set input dimensions in network.
@@ -391,19 +394,22 @@ cdef class Parser:
             beam_density = self.cfg.get('beam_density', 0.0)
         cdef Doc doc
         cdef Beam beam
-        for docs in cytoolz.partition_all(batch_size, docs):
-            docs = list(docs)
-            if beam_width == 1:
-                parse_states = self.parse_batch(docs)
-                beams = []
-            else:
-                beams = self.beam_parse(docs,
-                            beam_width=beam_width, beam_density=beam_density)
-                parse_states = []
-                for beam in beams:
-                    parse_states.append(<StateClass>beam.at(0))
-            self.set_annotations(docs, parse_states)
-            yield from docs
+        for batch in cytoolz.partition_all(batch_size, docs):
+            batch = list(batch)
+            by_length = sorted(list(batch), key=lambda doc: len(doc))
+            for subbatch in cytoolz.partition_all(32, by_length):
+                subbatch = list(subbatch)
+                if beam_width == 1:
+                    parse_states = self.parse_batch(subbatch)
+                    beams = []
+                else:
+                    beams = self.beam_parse(subbatch,
+                                beam_width=beam_width, beam_density=beam_density)
+                    parse_states = []
+                    for beam in beams:
+                        parse_states.append(<StateClass>beam.at(0))
+                self.set_annotations(subbatch, parse_states)
+            yield from batch
 
     def parse_batch(self, docs):
         cdef:
@@ -437,38 +443,22 @@ cdef class Parser:
         cdef np.ndarray token_ids = numpy.zeros((nr_state, nr_feat), dtype='i')
         cdef np.ndarray is_valid = numpy.zeros((nr_state, nr_class), dtype='i')
         cdef np.ndarray scores
+        cdef np.ndarray hidden_weights = numpy.ascontiguousarray(vec2scores._layers[-1].W.T)
+        cdef np.ndarray hidden_bias = vec2scores._layers[-1].b
+
+        hW = <float*>hidden_weights.data
+        hb = <float*>hidden_bias.data
+        cdef int nr_hidden = hidden_weights.shape[0]
         c_token_ids = <int*>token_ids.data
         c_is_valid = <int*>is_valid.data
         cdef int has_hidden = not getattr(vec2scores, 'is_noop', False)
         cdef int nr_step
         while not next_step.empty():
             nr_step = next_step.size()
-            if not has_hidden:
-                for i in cython.parallel.prange(nr_step, num_threads=6,
-                                                nogil=True):
-                    self._parse_step(next_step[i],
-                        feat_weights, nr_class, nr_feat, nr_piece)
-            else:
-                hists = []
-                for i in range(nr_step):
-                    st = next_step[i]
-                    st.set_context_tokens(&c_token_ids[i*nr_feat], nr_feat)
-                    self.moves.set_valid(&c_is_valid[i*nr_class], st)
-                    hists.append([st.get_hist(j+1) for j in range(8)])
-                hists = numpy.asarray(hists)
-                vectors = state2vec(token_ids[:next_step.size()])
-                if self.cfg.get('hist_size'):
-                    scores = vec2scores((vectors, hists))
-                else:
-                    scores = vec2scores(vectors)
-                c_scores = <float*>scores.data
-                for i in range(nr_step):
-                    st = next_step[i]
-                    guess = arg_max_if_valid(
-                        &c_scores[i*nr_class], &c_is_valid[i*nr_class], nr_class)
-                    action = self.moves.c[guess]
-                    action.do(st, action.label)
-                    st.push_hist(guess)
+            for i in cython.parallel.prange(nr_step, num_threads=3,
+                                            nogil=True):
+                self._parse_step(next_step[i],
+                    feat_weights, hW, hb, nr_class, nr_hidden, nr_feat, nr_piece)
             this_step, next_step = next_step, this_step
             next_step.clear()
             for st in this_step:
@@ -528,24 +518,33 @@ cdef class Parser:
         return beams
 
     cdef void _parse_step(self, StateC* state,
-            const float* feat_weights,
-            int nr_class, int nr_feat, int nr_piece) nogil:
+            const float* feat_weights, const float* hW, const float* hb,
+            int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil:
         '''This only works with no hidden layers -- fast but inaccurate'''
         token_ids = <int*>calloc(nr_feat, sizeof(int))
-        scores = <float*>calloc(nr_class * nr_piece, sizeof(float))
+        vector = <float*>calloc(nr_hidden * nr_piece, sizeof(float))
+        scores = <float*>calloc(nr_class, sizeof(float))
         is_valid = <int*>calloc(nr_class, sizeof(int))
 
         state.set_context_tokens(token_ids, nr_feat)
-        sum_state_features(scores,
-            feat_weights, token_ids, 1, nr_feat, nr_class * nr_piece)
+        sum_state_features(vector,
+            feat_weights, token_ids, 1, nr_feat, nr_hidden * nr_piece)
+        for i in range(nr_hidden):
+            feature = Vec.max(&vector[i*nr_piece], nr_piece)
+            for j in range(nr_class):
+                scores[j] += feature * hW[j]
+            hW += nr_class
+        for i in range(nr_class):
+            scores[i] += hb[i]
         self.moves.set_valid(is_valid, state)
-        guess = arg_maxout_if_valid(scores, is_valid, nr_class, nr_piece)
+        guess = arg_max_if_valid(scores, is_valid, nr_class)
         action = self.moves.c[guess]
         action.do(state, action.label)
         state.push_hist(guess)
 
         free(is_valid)
         free(scores)
+        free(vector)
         free(token_ids)
 
     def update(self, docs, golds, drop=0., sgd=None, losses=None):

From 65bf5e85bdab144a034864450628fff969b51c05 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Wed, 18 Oct 2017 21:46:12 +0200
Subject: [PATCH 2/9] Improve piping in language.pipe

---
 spacy/language.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/spacy/language.py b/spacy/language.py
index abfc1a064..c706e532a 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -10,6 +10,7 @@ from collections import OrderedDict
 import itertools
 import weakref
 import functools
+import tqdm
 
 from .tokenizer import Tokenizer
 from .vocab import Vocab
@@ -447,11 +448,9 @@ class Language(object):
         golds = list(golds)
         for name, pipe in self.pipeline:
             if not hasattr(pipe, 'pipe'):
-                for doc in docs:
-                    pipe(doc)
+                docs = (pipe(doc) for doc in docs)
             else:
-                docs = list(pipe.pipe(docs))
-        assert len(docs) == len(golds)
+                docs = pipe.pipe(docs, batch_size=256)
         for doc, gold in zip(docs, golds):
             if verbose:
                 print(doc)

From 79fcf8576aa077749c91315d102c0d70d888ca60 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Wed, 18 Oct 2017 21:46:34 +0200
Subject: [PATCH 3/9] Compile with march=native

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 23b4f9581..2e2b816b7 100755
--- a/setup.py
+++ b/setup.py
@@ -53,7 +53,8 @@ MOD_NAMES = [
 COMPILE_OPTIONS =  {
     'msvc': ['/Ox', '/EHsc'],
     'mingw32' : ['-O3', '-Wno-strict-prototypes', '-Wno-unused-function'],
-    'other' : ['-O3', '-Wno-strict-prototypes', '-Wno-unused-function']
+    'other' : ['-O3', '-Wno-strict-prototypes', '-Wno-unused-function',
+               '-march=native']
 }
 
 

From f018f2030ccbc1871732020ff42cb2ebb2277a84 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Wed, 18 Oct 2017 21:48:00 +0200
Subject: [PATCH 4/9] Try optimized parser forward loop

---
 spacy/syntax/nn_parser.pxd         | 4 ++--
 spacy/syntax/nn_parser.pyx         | 2 --
 spacy/syntax/transition_system.pyx | 3 ++-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/spacy/syntax/nn_parser.pxd b/spacy/syntax/nn_parser.pxd
index b0b7693b7..fd1d4c9be 100644
--- a/spacy/syntax/nn_parser.pxd
+++ b/spacy/syntax/nn_parser.pxd
@@ -16,7 +16,7 @@ cdef class Parser:
     cdef public object _multitasks
 
     cdef void _parse_step(self, StateC* state,
-            const float* feat_weights,
-            int nr_class, int nr_feat, int nr_piece) nogil
+            const float* feat_weights, const float* hW, const float* hb,
+            int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil
 
     #cdef int parseC(self, TokenC* tokens, int length, int nr_feat) nogil
diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index f8e1baf35..4846f326e 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -289,8 +289,6 @@ cdef class Parser:
                     zero_init(Affine(nr_class, hidden_width, drop_factor=0.0))
                 )
                 upper.is_noop = False
-                print(upper._layers)
-                print(upper._layers[0]._layers)
 
         # TODO: This is an unfortunate hack atm!
         # Used to set input dimensions in network.
diff --git a/spacy/syntax/transition_system.pyx b/spacy/syntax/transition_system.pyx
index 055129c8b..922fdf97c 100644
--- a/spacy/syntax/transition_system.pyx
+++ b/spacy/syntax/transition_system.pyx
@@ -148,7 +148,8 @@ cdef class TransitionSystem:
 
     def add_action(self, int action, label_name):
         cdef attr_t label_id
-        if not isinstance(label_name, (int, long)):
+        if not isinstance(label_name, int) and \
+        not isinstance(label_name, long):
             label_id = self.strings.add(label_name)
         else:
             label_id = label_name

From bbfd7d8d5de70249a949161b7cfee5f21274965d Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Thu, 19 Oct 2017 00:25:21 +0200
Subject: [PATCH 5/9] Clean up parser multi-threading

---
 spacy/syntax/nn_parser.pxd |   4 +-
 spacy/syntax/nn_parser.pyx | 111 +++++++++++++++++--------------------
 2 files changed, 53 insertions(+), 62 deletions(-)

diff --git a/spacy/syntax/nn_parser.pxd b/spacy/syntax/nn_parser.pxd
index fd1d4c9be..1d389609b 100644
--- a/spacy/syntax/nn_parser.pxd
+++ b/spacy/syntax/nn_parser.pxd
@@ -15,8 +15,6 @@ cdef class Parser:
     cdef readonly object cfg
     cdef public object _multitasks
 
-    cdef void _parse_step(self, StateC* state,
+    cdef void _parseC(self, StateC* state, 
             const float* feat_weights, const float* hW, const float* hb,
             int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil
-
-    #cdef int parseC(self, TokenC* tokens, int length, int nr_feat) nogil
diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index 4846f326e..fbd950292 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -395,7 +395,7 @@ cdef class Parser:
         for batch in cytoolz.partition_all(batch_size, docs):
             batch = list(batch)
             by_length = sorted(list(batch), key=lambda doc: len(doc))
-            for subbatch in cytoolz.partition_all(32, by_length):
+            for subbatch in cytoolz.partition_all(8, by_length):
                 subbatch = list(subbatch)
                 if beam_width == 1:
                     parse_states = self.parse_batch(subbatch)
@@ -412,57 +412,80 @@ cdef class Parser:
     def parse_batch(self, docs):
         cdef:
             precompute_hiddens state2vec
-            StateClass state
+            StateClass stcls
             Pool mem
             const float* feat_weights
             StateC* st
-            vector[StateC*] next_step, this_step
-            int nr_class, nr_feat, nr_piece, nr_dim, nr_state
+            vector[StateC*] states
+            int guess, nr_class, nr_feat, nr_piece, nr_dim, nr_state, nr_step
+            int j
         if isinstance(docs, Doc):
             docs = [docs]
 
         cuda_stream = get_cuda_stream()
         (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(docs, cuda_stream,
                                                                             0.0)
-
         nr_state = len(docs)
         nr_class = self.moves.n_moves
         nr_dim = tokvecs.shape[1]
         nr_feat = self.nr_feature
         nr_piece = state2vec.nP
 
-        states = self.moves.init_batch(docs)
-        for state in states:
-            if not state.c.is_final():
-                next_step.push_back(state.c)
-
+        state_objs = self.moves.init_batch(docs)
+        for stcls in state_objs:
+            if not stcls.c.is_final():
+                states.push_back(stcls.c)
+                
         feat_weights = state2vec.get_feat_weights()
         cdef int i
-        cdef np.ndarray token_ids = numpy.zeros((nr_state, nr_feat), dtype='i')
-        cdef np.ndarray is_valid = numpy.zeros((nr_state, nr_class), dtype='i')
-        cdef np.ndarray scores
         cdef np.ndarray hidden_weights = numpy.ascontiguousarray(vec2scores._layers[-1].W.T)
         cdef np.ndarray hidden_bias = vec2scores._layers[-1].b
 
         hW = <float*>hidden_weights.data
         hb = <float*>hidden_bias.data
         cdef int nr_hidden = hidden_weights.shape[0]
-        c_token_ids = <int*>token_ids.data
-        c_is_valid = <int*>is_valid.data
-        cdef int has_hidden = not getattr(vec2scores, 'is_noop', False)
-        cdef int nr_step
-        while not next_step.empty():
-            nr_step = next_step.size()
-            for i in cython.parallel.prange(nr_step, num_threads=3,
-                                            nogil=True):
-                self._parse_step(next_step[i],
-                    feat_weights, hW, hb, nr_class, nr_hidden, nr_feat, nr_piece)
-            this_step, next_step = next_step, this_step
-            next_step.clear()
-            for st in this_step:
-                if not st.is_final():
-                    next_step.push_back(st)
-        return states
+       
+        with nogil:
+            for i in cython.parallel.prange(states.size(), num_threads=2,
+                                            schedule='guided'):
+                self._parseC(states[i],
+                    feat_weights, hW, hb,
+                    nr_class, nr_hidden, nr_feat, nr_piece)
+        return state_objs
+
+    cdef void _parseC(self, StateC* state, 
+            const float* feat_weights, const float* hW, const float* hb,
+            int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil:
+        token_ids = <int*>calloc(nr_feat, sizeof(int))
+        is_valid = <int*>calloc(nr_class, sizeof(int))
+        vectors = <float*>calloc(nr_hidden * nr_piece, sizeof(float))
+        scores = <float*>calloc(nr_class, sizeof(float))
+        
+        while not state.is_final():
+            state.set_context_tokens(token_ids, nr_feat)
+            memset(vectors, 0, nr_hidden * nr_piece * sizeof(float))
+            memset(scores, 0, nr_class * sizeof(float))
+            sum_state_features(vectors,
+                feat_weights, token_ids, 1, nr_feat, nr_hidden * nr_piece)
+            V = vectors
+            W = hW
+            for i in range(nr_hidden):
+                feature = V[0] if V[0] >= V[1] else V[1]
+                for j in range(nr_class):
+                    scores[j] += feature * W[j]
+                W += nr_class
+                V += nr_piece
+            for i in range(nr_class):
+                scores[i] += hb[i]
+            self.moves.set_valid(is_valid, state)
+            guess = arg_max_if_valid(scores, is_valid, nr_class)
+            action = self.moves.c[guess]
+            action.do(state, action.label)
+            state.push_hist(guess)
+        free(token_ids)
+        free(is_valid)
+        free(vectors)
+        free(scores)
 
     def beam_parse(self, docs, int beam_width=3, float beam_density=0.001):
         cdef Beam beam
@@ -515,36 +538,6 @@ cdef class Parser:
             beams.append(beam)
         return beams
 
-    cdef void _parse_step(self, StateC* state,
-            const float* feat_weights, const float* hW, const float* hb,
-            int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil:
-        '''This only works with no hidden layers -- fast but inaccurate'''
-        token_ids = <int*>calloc(nr_feat, sizeof(int))
-        vector = <float*>calloc(nr_hidden * nr_piece, sizeof(float))
-        scores = <float*>calloc(nr_class, sizeof(float))
-        is_valid = <int*>calloc(nr_class, sizeof(int))
-
-        state.set_context_tokens(token_ids, nr_feat)
-        sum_state_features(vector,
-            feat_weights, token_ids, 1, nr_feat, nr_hidden * nr_piece)
-        for i in range(nr_hidden):
-            feature = Vec.max(&vector[i*nr_piece], nr_piece)
-            for j in range(nr_class):
-                scores[j] += feature * hW[j]
-            hW += nr_class
-        for i in range(nr_class):
-            scores[i] += hb[i]
-        self.moves.set_valid(is_valid, state)
-        guess = arg_max_if_valid(scores, is_valid, nr_class)
-        action = self.moves.c[guess]
-        action.do(state, action.label)
-        state.push_hist(guess)
-
-        free(is_valid)
-        free(scores)
-        free(vector)
-        free(token_ids)
-
     def update(self, docs, golds, drop=0., sgd=None, losses=None):
         if not any(self.moves.has_gold(gold) for gold in golds):
             return None

From 960788aaa2681ba5c33ceac257f46d7a6389f949 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Thu, 19 Oct 2017 00:42:34 +0200
Subject: [PATCH 6/9] Eliminate dead code in parser, and raise errors for
 obsolete options

---
 spacy/syntax/nn_parser.pyx | 79 ++++++++------------------------------
 1 file changed, 16 insertions(+), 63 deletions(-)

diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index fbd950292..f5c0454bc 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -241,54 +241,32 @@ cdef class Parser:
     @classmethod
     def Model(cls, nr_class, **cfg):
         depth = util.env_opt('parser_hidden_depth', cfg.get('hidden_depth', 1))
+        if depth != 1:
+            raise ValueError("Currently parser depth is hard-coded to 1.")
+        parser_maxout_pieces = util.env_opt('parser_maxout_pieces', cfg.get('maxout_pieces', 2))
+        if parser_maxout_pieces != 2:
+            raise ValueError("Currently parser_maxout_pieces is hard-coded to 2")
         token_vector_width = util.env_opt('token_vector_width', cfg.get('token_vector_width', 128))
         hidden_width = util.env_opt('hidden_width', cfg.get('hidden_width', 200))
-        parser_maxout_pieces = util.env_opt('parser_maxout_pieces', cfg.get('maxout_pieces', 2))
         embed_size = util.env_opt('embed_size', cfg.get('embed_size', 7000))
         hist_size = util.env_opt('history_feats', cfg.get('hist_size', 0))
         hist_width = util.env_opt('history_width', cfg.get('hist_width', 0))
-        if hist_size >= 1 and depth == 0:
-            raise ValueError("Inconsistent hyper-params: "
-                "history_feats >= 1 but parser_hidden_depth==0")
+        if hist_size != 0:
+            raise ValueError("Currently history size is hard-coded to 0")
+        if hist_width != 0: 
+            raise ValueError("Currently history width is hard-coded to 0")
         tok2vec = Tok2Vec(token_vector_width, embed_size,
                           pretrained_dims=cfg.get('pretrained_dims', 0))
         tok2vec = chain(tok2vec, flatten)
-        if parser_maxout_pieces == 1:
-            lower = PrecomputableAffine(hidden_width if depth >= 1 else nr_class,
-                        nF=cls.nr_feature,
-                        nI=token_vector_width)
-        else:
-            lower = PrecomputableMaxouts(hidden_width if depth >= 1 else nr_class,
-                        nF=cls.nr_feature,
-                        nP=parser_maxout_pieces,
-                        nI=token_vector_width)
+        lower = PrecomputableMaxouts(hidden_width if depth >= 1 else nr_class,
+                    nF=cls.nr_feature, nP=parser_maxout_pieces,
+                    nI=token_vector_width)
 
         with Model.use_device('cpu'):
-            if depth == 0:
-                upper = chain()
-                upper.is_noop = True
-            elif hist_size and depth == 1:
-                upper = chain(
-                    HistoryFeatures(nr_class=nr_class, hist_size=hist_size,
-                                    nr_dim=hist_width),
-                    zero_init(Affine(nr_class, hidden_width+hist_size*hist_width,
-                                     drop_factor=0.0)))
-                upper.is_noop = False
-            elif hist_size:
-                upper = chain(
-                    HistoryFeatures(nr_class=nr_class, hist_size=hist_size,
-                                    nr_dim=hist_width),
-                    LayerNorm(Maxout(hidden_width, hidden_width+hist_size*hist_width)),
-                    clone(LayerNorm(Maxout(hidden_width, hidden_width)), depth-2),
-                    zero_init(Affine(nr_class, hidden_width, drop_factor=0.0))
-                )
-                upper.is_noop = False
-            else:
-                upper = chain(
-                    clone(LayerNorm(Maxout(hidden_width, hidden_width)), depth-1),
-                    zero_init(Affine(nr_class, hidden_width, drop_factor=0.0))
-                )
-                upper.is_noop = False
+            upper = chain(
+                clone(LayerNorm(Maxout(hidden_width, hidden_width)), depth-1),
+                zero_init(Affine(nr_class, hidden_width, drop_factor=0.0))
+            )
 
         # TODO: This is an unfortunate hack atm!
         # Used to set input dimensions in network.
@@ -959,31 +937,6 @@ cdef int arg_max_if_valid(const weight_t* scores, const int* is_valid, int n) no
     return best
 
 
-cdef int arg_maxout_if_valid(const weight_t* scores, const int* is_valid,
-                             int n, int nP) nogil:
-    cdef int best = -1
-    cdef float best_score = 0
-    for i in range(n):
-        if is_valid[i] >= 1:
-            for j in range(nP):
-                if best == -1 or scores[i*nP+j] > best_score:
-                    best = i
-                    best_score = scores[i*nP+j]
-    return best
-
-
-cdef int _arg_max_clas(const weight_t* scores, int move, const Transition* actions,
-                       int nr_class) except -1:
-    cdef weight_t score = 0
-    cdef int mode = -1
-    cdef int i
-    for i in range(nr_class):
-        if actions[i].move == move and (mode == -1 or scores[i] >= score):
-            mode = i
-            score = scores[i]
-    return mode
-
-
 # These are passed as callbacks to thinc.search.Beam
 cdef int _transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1:
     dest = <StateClass>_dest

From d4cfff0476bbef90acfba037d805a1b21449f5d7 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Thu, 19 Oct 2017 00:47:24 +0200
Subject: [PATCH 7/9] Comment out currently hard-coded hyper-params

---
 website/api/_top-level/_cli.jade | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/website/api/_top-level/_cli.jade b/website/api/_top-level/_cli.jade
index b2a9c574d..fc573e0ec 100644
--- a/website/api/_top-level/_cli.jade
+++ b/website/api/_top-level/_cli.jade
@@ -315,30 +315,30 @@ p
         +cell Number of rows in embedding tables.
         +cell #[code 7500]
 
-    +row
-        +cell #[code parser_maxout_pieces]
-        +cell Number of pieces in the parser's and NER's first maxout layer.
-        +cell #[code 2]
+    //- +row
+    //-     +cell #[code parser_maxout_pieces]
+    //-     +cell Number of pieces in the parser's and NER's first maxout layer.
+    //-     +cell #[code 2]
 
-    +row
-        +cell #[code parser_hidden_depth]
-        +cell Number of hidden layers in the parser and NER.
-        +cell #[code 1]
+    //- +row
+    //-     +cell #[code parser_hidden_depth]
+    //-     +cell Number of hidden layers in the parser and NER.
+    //-     +cell #[code 1]
 
     +row
         +cell #[code hidden_width]
         +cell Size of the parser's and NER's hidden layers.
         +cell #[code 128]
 
-    +row
-        +cell #[code history_feats]
-        +cell Number of previous action ID features for parser and NER.
-        +cell #[code 128]
+    //- +row
+    //-     +cell #[code history_feats]
+    //-     +cell Number of previous action ID features for parser and NER.
+    //-     +cell #[code 128]
 
-    +row
-        +cell #[code history_width]
-        +cell Number of embedding dimensions for each action ID.
-        +cell #[code 128]
+    //- +row
+    //-     +cell #[code history_width]
+    //-     +cell Number of embedding dimensions for each action ID.
+    //-     +cell #[code 128]
 
     +row
         +cell #[code learn_rate]

From 906c50ac5997c33feb0ecb82d32faacf8b0b50e0 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Thu, 19 Oct 2017 01:48:39 +0200
Subject: [PATCH 8/9] Fix loop typing, that caused error on windows

---
 spacy/syntax/nn_parser.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index f5c0454bc..f79837fae 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -422,9 +422,9 @@ cdef class Parser:
         hW = <float*>hidden_weights.data
         hb = <float*>hidden_bias.data
         cdef int nr_hidden = hidden_weights.shape[0]
-       
+        cdef int nr_task = states.size()
         with nogil:
-            for i in cython.parallel.prange(states.size(), num_threads=2,
+            for i in cython.parallel.prange(nr_task, num_threads=2,
                                             schedule='guided'):
                 self._parseC(states[i],
                     feat_weights, hW, hb,

From 15e5a04a8d9a1be29844332295eea85bedae12fc Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Thu, 19 Oct 2017 01:48:43 +0200
Subject: [PATCH 9/9] Clean up more depth=0 conditional code

---
 spacy/syntax/nn_parser.pyx | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index f79837fae..cb26b8d37 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -38,7 +38,7 @@ from murmurhash.mrmr cimport hash64
 from preshed.maps cimport MapStruct
 from preshed.maps cimport map_get
 
-from thinc.api import layerize, chain, noop, clone, with_flatten
+from thinc.api import layerize, chain, clone, with_flatten
 from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu, SELU
 from thinc.misc import LayerNorm
 
@@ -768,20 +768,11 @@ cdef class Parser:
         if self.model not in (True, False, None) and resized:
             # Weights are stored in (nr_out, nr_in) format, so we're basically
             # just adding rows here.
-            if self.model[-1].is_noop:
-                smaller = self.model[1]
-                dims = dict(self.model[1]._dims)
-                dims['nO'] = self.moves.n_moves
-                larger = self.model[1].__class__(**dims)
-                copy_array(larger.W[:, :smaller.nO], smaller.W)
-                copy_array(larger.b[:smaller.nO], smaller.b)
-                self.model = (self.model[0], larger, self.model[2])
-            else:
-                smaller = self.model[-1]._layers[-1]
-                larger = Affine(self.moves.n_moves, smaller.nI)
-                copy_array(larger.W[:smaller.nO], smaller.W)
-                copy_array(larger.b[:smaller.nO], smaller.b)
-                self.model[-1]._layers[-1] = larger
+            smaller = self.model[-1]._layers[-1]
+            larger = Affine(self.moves.n_moves, smaller.nI)
+            copy_array(larger.W[:smaller.nO], smaller.W)
+            copy_array(larger.b[:smaller.nO], smaller.b)
+            self.model[-1]._layers[-1] = larger
 
     def begin_training(self, gold_tuples, pipeline=None, **cfg):
         if 'model' in cfg: