diff --git a/spacy/syntax/_parse_features.pxd b/spacy/syntax/_parse_features.pxd
deleted file mode 100644
index 0842e3504..000000000
--- a/spacy/syntax/_parse_features.pxd
+++ /dev/null
@@ -1,259 +0,0 @@
-from thinc.typedefs cimport atom_t
-
-from .stateclass cimport StateClass
-from ._state cimport StateC
-
-
-cdef int fill_context(atom_t* context, const StateC* state) nogil
-# Context elements
-
-# Ensure each token's attributes are listed: w, p, c, c6, c4. The order
-# is referenced by incrementing the enum...
-
-# Tokens are listed in left-to-right order.
-#cdef size_t* SLOTS = [
-#    S2w, S1w,
-#    S0l0w, S0l2w, S0lw,
-#    S0w,
-#    S0r0w, S0r2w, S0rw,
-#    N0l0w, N0l2w, N0lw,
-#    P2w, P1w,
-#    N0w, N1w, N2w, N3w, 0
-#]
-
-# NB: The order of the enum is _NOT_ arbitrary!!
-cpdef enum:
-    S2w
-    S2W
-    S2p
-    S2c
-    S2c4
-    S2c6
-    S2L
-    S2_prefix
-    S2_suffix
-    S2_shape
-    S2_ne_iob
-    S2_ne_type
-
-    S1w
-    S1W
-    S1p
-    S1c
-    S1c4
-    S1c6
-    S1L
-    S1_prefix
-    S1_suffix
-    S1_shape
-    S1_ne_iob
-    S1_ne_type
-
-    S1rw
-    S1rW
-    S1rp
-    S1rc
-    S1rc4
-    S1rc6
-    S1rL
-    S1r_prefix
-    S1r_suffix
-    S1r_shape
-    S1r_ne_iob
-    S1r_ne_type
-
-    S0lw
-    S0lW
-    S0lp
-    S0lc
-    S0lc4
-    S0lc6
-    S0lL
-    S0l_prefix
-    S0l_suffix
-    S0l_shape
-    S0l_ne_iob
-    S0l_ne_type
-
-    S0l2w
-    S0l2W
-    S0l2p
-    S0l2c
-    S0l2c4
-    S0l2c6
-    S0l2L
-    S0l2_prefix
-    S0l2_suffix
-    S0l2_shape
-    S0l2_ne_iob
-    S0l2_ne_type
-
-    S0w
-    S0W
-    S0p
-    S0c
-    S0c4
-    S0c6
-    S0L
-    S0_prefix
-    S0_suffix
-    S0_shape
-    S0_ne_iob
-    S0_ne_type
-
-    S0r2w
-    S0r2W
-    S0r2p
-    S0r2c
-    S0r2c4
-    S0r2c6
-    S0r2L
-    S0r2_prefix
-    S0r2_suffix
-    S0r2_shape
-    S0r2_ne_iob
-    S0r2_ne_type
-
-    S0rw
-    S0rW
-    S0rp
-    S0rc
-    S0rc4
-    S0rc6
-    S0rL
-    S0r_prefix
-    S0r_suffix
-    S0r_shape
-    S0r_ne_iob
-    S0r_ne_type
-
-    N0l2w
-    N0l2W
-    N0l2p
-    N0l2c
-    N0l2c4
-    N0l2c6
-    N0l2L
-    N0l2_prefix
-    N0l2_suffix
-    N0l2_shape
-    N0l2_ne_iob
-    N0l2_ne_type
-
-    N0lw
-    N0lW
-    N0lp
-    N0lc
-    N0lc4
-    N0lc6
-    N0lL
-    N0l_prefix
-    N0l_suffix
-    N0l_shape
-    N0l_ne_iob
-    N0l_ne_type
-
-    N0w
-    N0W
-    N0p
-    N0c
-    N0c4
-    N0c6
-    N0L
-    N0_prefix
-    N0_suffix
-    N0_shape
-    N0_ne_iob
-    N0_ne_type
-
-    N1w
-    N1W
-    N1p
-    N1c
-    N1c4
-    N1c6
-    N1L
-    N1_prefix
-    N1_suffix
-    N1_shape
-    N1_ne_iob
-    N1_ne_type
-
-    N2w
-    N2W
-    N2p
-    N2c
-    N2c4
-    N2c6
-    N2L
-    N2_prefix
-    N2_suffix
-    N2_shape
-    N2_ne_iob
-    N2_ne_type
-
-    P1w
-    P1W
-    P1p
-    P1c
-    P1c4
-    P1c6
-    P1L
-    P1_prefix
-    P1_suffix
-    P1_shape
-    P1_ne_iob
-    P1_ne_type
-
-    P2w
-    P2W
-    P2p
-    P2c
-    P2c4
-    P2c6
-    P2L
-    P2_prefix
-    P2_suffix
-    P2_shape
-    P2_ne_iob
-    P2_ne_type
-
-    E0w
-    E0W
-    E0p
-    E0c
-    E0c4
-    E0c6
-    E0L
-    E0_prefix
-    E0_suffix
-    E0_shape
-    E0_ne_iob
-    E0_ne_type
-
-    E1w
-    E1W
-    E1p
-    E1c
-    E1c4
-    E1c6
-    E1L
-    E1_prefix
-    E1_suffix
-    E1_shape
-    E1_ne_iob
-    E1_ne_type
-
-    # Misc features at the end
-    dist
-    N0lv
-    S0lv
-    S0rv
-    S1lv
-    S1rv
-
-    S0_has_head
-    S1_has_head
-    S2_has_head
-
-    CONTEXT_SIZE
diff --git a/spacy/syntax/_parse_features.pyx b/spacy/syntax/_parse_features.pyx
deleted file mode 100644
index 2e0db4877..000000000
--- a/spacy/syntax/_parse_features.pyx
+++ /dev/null
@@ -1,419 +0,0 @@
-"""
-Fill an array, context, with every _atomic_ value our features reference.
-We then write the _actual features_ as tuples of the atoms. The machinery
-that translates from the tuples to feature-extractors (which pick the values
-out of "context") is in features/extractor.pyx
-
-The atomic feature names are listed in a big enum, so that the feature tuples
-can refer to them.
-"""
-# coding: utf-8
-from __future__ import unicode_literals
-
-from libc.string cimport memset
-from itertools import combinations
-from cymem.cymem cimport Pool
-
-from ..structs cimport TokenC
-from .stateclass cimport StateClass
-from ._state cimport StateC
-
-
-cdef inline void fill_token(atom_t* context, const TokenC* token) nogil:
-    if token is NULL:
-        context[0] = 0
-        context[1] = 0
-        context[2] = 0
-        context[3] = 0
-        context[4] = 0
-        context[5] = 0
-        context[6] = 0
-        context[7] = 0
-        context[8] = 0
-        context[9] = 0
-        context[10] = 0
-        context[11] = 0
-    else:
-        context[0] = token.lex.orth
-        context[1] = token.lemma
-        context[2] = token.tag
-        context[3] = token.lex.cluster
-        # We've read in the string little-endian, so now we can take & (2**n)-1
-        # to get the first n bits of the cluster.
-        # e.g. s = "1110010101"
-        # s = ''.join(reversed(s))
-        # first_4_bits = int(s, 2)
-        # print first_4_bits
-        # 5
-        # print "{0:b}".format(prefix).ljust(4, '0')
-        # 1110
-        # What we're doing here is picking a number where all bits are 1, e.g.
-        # 15 is 1111, 63 is 111111 and doing bitwise AND, so getting all bits in
-        # the source that are set to 1.
-        context[4] = token.lex.cluster & 15
-        context[5] = token.lex.cluster & 63
-        context[6] = token.dep if token.head != 0 else 0
-        context[7] = token.lex.prefix
-        context[8] = token.lex.suffix
-        context[9] = token.lex.shape
-        context[10] = token.ent_iob
-        context[11] = token.ent_type
-
-cdef int fill_context(atom_t* ctxt, const StateC* st) nogil:
-    # Take care to fill every element of context!
-    # We could memset, but this makes it very easy to have broken features that
-    # make almost no impact on accuracy. If instead they're unset, the impact
-    # tends to be dramatic, so we get an obvious regression to fix...
-    fill_token(&ctxt[S2w], st.S_(2))
-    fill_token(&ctxt[S1w], st.S_(1))
-    fill_token(&ctxt[S1rw], st.R_(st.S(1), 1))
-    fill_token(&ctxt[S0lw], st.L_(st.S(0), 1))
-    fill_token(&ctxt[S0l2w], st.L_(st.S(0), 2))
-    fill_token(&ctxt[S0w], st.S_(0))
-    fill_token(&ctxt[S0r2w], st.R_(st.S(0), 2))
-    fill_token(&ctxt[S0rw], st.R_(st.S(0), 1))
-    fill_token(&ctxt[N0lw], st.L_(st.B(0), 1))
-    fill_token(&ctxt[N0l2w], st.L_(st.B(0), 2))
-    fill_token(&ctxt[N0w], st.B_(0))
-    fill_token(&ctxt[N1w], st.B_(1))
-    fill_token(&ctxt[N2w], st.B_(2))
-    fill_token(&ctxt[P1w], st.safe_get(st.B(0)-1))
-    fill_token(&ctxt[P2w], st.safe_get(st.B(0)-2))
-
-    fill_token(&ctxt[E0w], st.E_(0))
-    fill_token(&ctxt[E1w], st.E_(1))
-
-    if st.stack_depth() >= 1 and not st.eol():
-        ctxt[dist] = min_(st.B(0) - st.E(0), 5)
-    else:
-        ctxt[dist] = 0
-    ctxt[N0lv] = min_(st.n_L(st.B(0)), 5)
-    ctxt[S0lv] = min_(st.n_L(st.S(0)), 5)
-    ctxt[S0rv] = min_(st.n_R(st.S(0)), 5)
-    ctxt[S1lv] = min_(st.n_L(st.S(1)), 5)
-    ctxt[S1rv] = min_(st.n_R(st.S(1)), 5)
-
-    ctxt[S0_has_head] = 0
-    ctxt[S1_has_head] = 0
-    ctxt[S2_has_head] = 0
-    if st.stack_depth() >= 1:
-        ctxt[S0_has_head] = st.has_head(st.S(0)) + 1
-        if st.stack_depth() >= 2:
-            ctxt[S1_has_head] = st.has_head(st.S(1)) + 1
-            if st.stack_depth() >= 3:
-                ctxt[S2_has_head] = st.has_head(st.S(2)) + 1
-
-
-cdef inline int min_(int a, int b) nogil:
-    return a if a > b else b
-
-
-ner = (
-    (N0W,),
-    (P1W,),
-    (N1W,),
-    (P2W,),
-    (N2W,),
-
-    (P1W, N0W,),
-    (N0W, N1W),
-
-    (N0_prefix,),
-    (N0_suffix,),
-
-    (P1_shape,),
-    (N0_shape,),
-    (N1_shape,),
-    (P1_shape, N0_shape,),
-    (N0_shape, P1_shape,),
-    (P1_shape, N0_shape, N1_shape),
-    (N2_shape,),
-    (P2_shape,),
-
-    #(P2_norm, P1_norm, W_norm),
-    #(P1_norm, W_norm, N1_norm),
-    #(W_norm, N1_norm, N2_norm)
-
-    (P2p,),
-    (P1p,),
-    (N0p,),
-    (N1p,),
-    (N2p,),
-
-    (P1p, N0p),
-    (N0p, N1p),
-    (P2p, P1p, N0p),
-    (P1p, N0p, N1p),
-    (N0p, N1p, N2p),
-
-    (P2c,),
-    (P1c,),
-    (N0c,),
-    (N1c,),
-    (N2c,),
-
-    (P1c, N0c),
-    (N0c, N1c),
-
-    (E0W,),
-    (E0c,),
-    (E0p,),
-
-    (E0W, N0W),
-    (E0c, N0W),
-    (E0p, N0W),
-
-    (E0p, P1p, N0p),
-    (E0c, P1c, N0c),
-
-    (E0w, P1c),
-    (E0p, P1p),
-    (E0c, P1c),
-    (E0p, E1p),
-    (E0c, P1p),
-
-    (E1W,),
-    (E1c,),
-    (E1p,),
-
-    (E0W, E1W),
-    (E0W, E1p,),
-    (E0p, E1W,),
-    (E0p, E1W),
-
-    (P1_ne_iob,),
-    (P1_ne_iob, P1_ne_type),
-    (N0w, P1_ne_iob, P1_ne_type),
-
-    (N0_shape,),
-    (N1_shape,),
-    (N2_shape,),
-    (P1_shape,),
-    (P2_shape,),
-
-    (N0_prefix,),
-    (N0_suffix,),
-
-    (P1_ne_iob,),
-    (P2_ne_iob,),
-    (P1_ne_iob, P2_ne_iob),
-    (P1_ne_iob, P1_ne_type),
-    (P2_ne_iob, P2_ne_type),
-    (N0w, P1_ne_iob, P1_ne_type),
-
-    (N0w, N1w),
-)
-
-
-unigrams = (
-    (S2W, S2p),
-    (S2c6, S2p),
-
-    (S1W, S1p),
-    (S1c6, S1p),
-
-    (S0W, S0p),
-    (S0c6, S0p),
-
-    (N0W, N0p),
-    (N0p,),
-    (N0c,),
-    (N0c6, N0p),
-    (N0L,),
-
-    (N1W, N1p),
-    (N1c6, N1p),
-
-    (N2W, N2p),
-    (N2c6, N2p),
-
-    (S0r2W, S0r2p),
-    (S0r2c6, S0r2p),
-    (S0r2L,),
-
-    (S0rW, S0rp),
-    (S0rc6, S0rp),
-    (S0rL,),
-
-    (S0l2W, S0l2p),
-    (S0l2c6, S0l2p),
-    (S0l2L,),
-
-    (S0lW, S0lp),
-    (S0lc6, S0lp),
-    (S0lL,),
-
-    (N0l2W, N0l2p),
-    (N0l2c6, N0l2p),
-    (N0l2L,),
-
-    (N0lW, N0lp),
-    (N0lc6, N0lp),
-    (N0lL,),
-)
-
-
-s0_n0 = (
-    (S0W, S0p, N0W, N0p),
-    (S0c, S0p, N0c, N0p),
-    (S0c6, S0p, N0c6, N0p),
-    (S0c4, S0p, N0c4, N0p),
-    (S0p, N0p),
-    (S0W, N0p),
-    (S0p, N0W),
-    (S0W, N0c),
-    (S0c, N0W),
-    (S0p, N0c),
-    (S0c, N0p),
-    (S0W, S0rp, N0p),
-    (S0p, S0rp, N0p),
-    (S0p, N0lp, N0W),
-    (S0p, N0lp, N0p),
-    (S0L, N0p),
-    (S0p, S0rL, N0p),
-    (S0p, N0lL, N0p),
-    (S0p, S0rv, N0p),
-    (S0p, N0lv, N0p),
-    (S0c6, S0rL, S0r2L, N0p),
-    (S0p, N0lL, N0l2L, N0p),
-)
-
-
-s1_s0 = (
-    (S1p, S0p),
-    (S1p, S0p, S0_has_head),
-    (S1W, S0p),
-    (S1W, S0p, S0_has_head),
-    (S1c, S0p),
-    (S1c, S0p, S0_has_head),
-    (S1p, S1rL, S0p),
-    (S1p, S1rL, S0p, S0_has_head),
-    (S1p, S0lL, S0p),
-    (S1p, S0lL, S0p, S0_has_head),
-    (S1p, S0lL, S0l2L, S0p),
-    (S1p, S0lL, S0l2L, S0p, S0_has_head),
-    (S1L, S0L, S0W),
-    (S1L, S0L, S0p),
-    (S1p, S1L, S0L, S0p),
-    (S1p, S0p),
-)
-
-
-s1_n0 = (
-    (S1p, N0p),
-    (S1c, N0c),
-    (S1c, N0p),
-    (S1p, N0c),
-    (S1W, S1p, N0p),
-    (S1p, N0W, N0p),
-    (S1c6, S1p, N0c6, N0p),
-    (S1L, N0p),
-    (S1p, S1rL, N0p),
-    (S1p, S1rp, N0p),
-)
-
-
-s0_n1 = (
-    (S0p, N1p),
-    (S0c, N1c),
-    (S0c, N1p),
-    (S0p, N1c),
-    (S0W, S0p, N1p),
-    (S0p, N1W, N1p),
-    (S0c6, S0p, N1c6, N1p),
-    (S0L, N1p),
-    (S0p, S0rL, N1p),
-)
-
-
-n0_n1 = (
-    (N0W, N0p, N1W, N1p),
-    (N0W, N0p, N1p),
-    (N0p, N1W, N1p),
-    (N0c, N0p, N1c, N1p),
-    (N0c6, N0p, N1c6, N1p),
-    (N0c, N1c),
-    (N0p, N1c),
-)
-
-tree_shape = (
-    (dist,),
-    (S0p, S0_has_head, S1_has_head, S2_has_head),
-    (S0p, S0lv, S0rv),
-    (N0p, N0lv),
-)
-
-trigrams = (
-    (N0p, N1p, N2p),
-    (S0p, S0lp, S0l2p),
-    (S0p, S0rp, S0r2p),
-    (S0p, S1p, S2p),
-    (S1p, S0p, N0p),
-    (S0p, S0lp, N0p),
-    (S0p, N0p, N0lp),
-    (N0p, N0lp, N0l2p),
-
-    (S0W, S0p, S0rL, S0r2L),
-    (S0p, S0rL, S0r2L),
-
-    (S0W, S0p, S0lL, S0l2L),
-    (S0p, S0lL, S0l2L),
-
-    (N0W, N0p, N0lL, N0l2L),
-    (N0p, N0lL, N0l2L),
-)
-
-
-words = (
-    S2w,
-    S1w,
-    S1rw,
-    S0lw,
-    S0l2w,
-    S0w,
-    S0r2w,
-    S0rw,
-    N0lw,
-    N0l2w,
-    N0w,
-    N1w,
-    N2w,
-    P1w,
-    P2w
-)
-
-tags = (
-    S2p,
-    S1p,
-    S1rp,
-    S0lp,
-    S0l2p,
-    S0p,
-    S0r2p,
-    S0rp,
-    N0lp,
-    N0l2p,
-    N0p,
-    N1p,
-    N2p,
-    P1p,
-    P2p
-)
-
-labels = (
-    S2L,
-    S1L,
-    S1rL,
-    S0lL,
-    S0l2L,
-    S0L,
-    S0r2L,
-    S0rL,
-    N0lL,
-    N0l2L,
-    N0L,
-    N1L,
-    N2L,
-    P1L,
-    P2L
-)
diff --git a/spacy/syntax/beam_parser.pxd b/spacy/syntax/beam_parser.pxd
deleted file mode 100644
index 35a60cbf3..000000000
--- a/spacy/syntax/beam_parser.pxd
+++ /dev/null
@@ -1,10 +0,0 @@
-from .parser cimport Parser
-from ..structs cimport TokenC
-from thinc.typedefs cimport weight_t
-
-
-cdef class BeamParser(Parser):
-    cdef public int beam_width
-    cdef public weight_t beam_density
-
-    cdef int _parseC(self, TokenC* tokens, int length, int nr_feat, int nr_class) except -1
diff --git a/spacy/syntax/beam_parser.pyx b/spacy/syntax/beam_parser.pyx
deleted file mode 100644
index 68e9f27af..000000000
--- a/spacy/syntax/beam_parser.pyx
+++ /dev/null
@@ -1,239 +0,0 @@
-"""
-MALT-style dependency parser
-"""
-# cython: profile=True
-# cython: experimental_cpp_class_def=True
-# cython: cdivision=True
-# cython: infer_types=True
-# coding: utf-8
-
-from __future__ import unicode_literals, print_function
-cimport cython
-
-from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
-from libc.stdint cimport uint32_t, uint64_t
-from libc.string cimport memset, memcpy
-from libc.stdlib cimport rand
-from libc.math cimport log, exp, isnan, isinf
-from cymem.cymem cimport Pool, Address
-from murmurhash.mrmr cimport real_hash64 as hash64
-from thinc.typedefs cimport weight_t, class_t, feat_t, atom_t, hash_t
-from thinc.linear.features cimport ConjunctionExtracter
-from thinc.structs cimport FeatureC, ExampleC
-from thinc.extra.search cimport Beam, MaxViolation
-from thinc.extra.eg cimport Example
-from thinc.extra.mb cimport Minibatch
-
-from ..structs cimport TokenC
-from ..tokens.doc cimport Doc
-from ..strings cimport StringStore
-from .transition_system cimport TransitionSystem, Transition
-from ..gold cimport GoldParse
-from . import _parse_features
-from ._parse_features cimport CONTEXT_SIZE
-from ._parse_features cimport fill_context
-from .stateclass cimport StateClass
-from .parser cimport Parser
-
-
-DEBUG = False
-def set_debug(val):
-    global DEBUG
-    DEBUG = val
-
-
-def get_templates(name):
-    pf = _parse_features
-    if name == 'ner':
-        return pf.ner
-    elif name == 'debug':
-        return pf.unigrams
-    else:
-        return (pf.unigrams + pf.s0_n0 + pf.s1_n0 + pf.s1_s0 + pf.s0_n1 + pf.n0_n1 + \
-                pf.tree_shape + pf.trigrams)
-
-
-cdef int BEAM_WIDTH = 16
-cdef weight_t BEAM_DENSITY = 0.001
-
-cdef class BeamParser(Parser):
-    def __init__(self, *args, **kwargs):
-        self.beam_width = kwargs.get('beam_width', BEAM_WIDTH)
-        self.beam_density = kwargs.get('beam_density', BEAM_DENSITY)
-        Parser.__init__(self, *args, **kwargs)
-
-    cdef int parseC(self, TokenC* tokens, int length, int nr_feat) nogil:
-        with gil:
-            self._parseC(tokens, length, nr_feat, self.moves.n_moves)
-
-    cdef int _parseC(self, TokenC* tokens, int length, int nr_feat, int nr_class) except -1:
-        cdef Beam beam = Beam(self.moves.n_moves, self.beam_width, min_density=self.beam_density)
-        # TODO: How do we handle new labels here? This increases nr_class
-        beam.initialize(self.moves.init_beam_state, length, tokens)
-        beam.check_done(_check_final_state, NULL)
-        if beam.is_done:
-            _cleanup(beam)
-            return 0
-        while not beam.is_done:
-            self._advance_beam(beam, None, False)
-        state = <StateClass>beam.at(0)
-        self.moves.finalize_state(state.c)
-        for i in range(length):
-            tokens[i] = state.c._sent[i]
-        _cleanup(beam)
-
-    def update(self, Doc tokens, GoldParse gold_parse, itn=0):
-        self.moves.preprocess_gold(gold_parse)
-        cdef Beam pred = Beam(self.moves.n_moves, self.beam_width)
-        pred.initialize(self.moves.init_beam_state, tokens.length, tokens.c)
-        pred.check_done(_check_final_state, NULL)
-        # Hack for NER
-        for i in range(pred.size):
-            stcls = <StateClass>pred.at(i)
-            self.moves.initialize_state(stcls.c)
-
-        cdef Beam gold = Beam(self.moves.n_moves, self.beam_width, min_density=0.0)
-        gold.initialize(self.moves.init_beam_state, tokens.length, tokens.c)
-        gold.check_done(_check_final_state, NULL)
-        violn = MaxViolation()
-        while not pred.is_done and not gold.is_done:
-            # We search separately here, to allow for ambiguity in the gold parse.
-            self._advance_beam(pred, gold_parse, False)
-            self._advance_beam(gold, gold_parse, True)
-            violn.check_crf(pred, gold)
-            if pred.loss > 0 and pred.min_score > (gold.score + self.model.time):
-                break
-        else:
-            # The non-monotonic oracle makes it difficult to ensure final costs are
-            # correct. Therefore do final correction
-            for i in range(pred.size):
-                if self.moves.is_gold_parse(<StateClass>pred.at(i), gold_parse):
-                    pred._states[i].loss = 0.0
-                elif pred._states[i].loss == 0.0:
-                    pred._states[i].loss = 1.0
-            violn.check_crf(pred, gold)
-        if pred.size < 1:
-            raise Exception("No candidates", tokens.length)
-        if gold.size < 1:
-            raise Exception("No gold", tokens.length)
-        if pred.loss == 0:
-            self.model.update_from_histories(self.moves, tokens, [(0.0, [])])
-        elif True:
-            #_check_train_integrity(pred, gold, gold_parse, self.moves)
-            histories = list(zip(violn.p_probs, violn.p_hist)) + \
-                        list(zip(violn.g_probs, violn.g_hist))
-            self.model.update_from_histories(self.moves, tokens, histories, min_grad=0.001**(itn+1))
-        else:
-            self.model.update_from_histories(self.moves, tokens,
-                [(1.0, violn.p_hist[0]), (-1.0, violn.g_hist[0])])
-        _cleanup(pred)
-        _cleanup(gold)
-        return pred.loss
-
-    def _advance_beam(self, Beam beam, GoldParse gold, bint follow_gold):
-        cdef atom_t[CONTEXT_SIZE] context
-        cdef Pool mem = Pool()
-        features = <FeatureC*>mem.alloc(self.model.nr_feat, sizeof(FeatureC))
-        if False:
-            mb = Minibatch(self.model.widths, beam.size)
-            for i in range(beam.size):
-                stcls = <StateClass>beam.at(i)
-                if stcls.c.is_final():
-                    nr_feat = 0
-                else:
-                    nr_feat = self.model.set_featuresC(context, features, stcls.c)
-                    self.moves.set_valid(beam.is_valid[i], stcls.c)
-                mb.c.push_back(features, nr_feat, beam.costs[i], beam.is_valid[i], 0)
-            self.model(mb)
-            for i in range(beam.size):
-                memcpy(beam.scores[i], mb.c.scores(i), mb.c.nr_out() * sizeof(beam.scores[i][0]))
-        else:
-            for i in range(beam.size):
-                stcls = <StateClass>beam.at(i)
-                if not stcls.is_final():
-                    nr_feat = self.model.set_featuresC(context, features, stcls.c)
-                    self.moves.set_valid(beam.is_valid[i], stcls.c)
-                    self.model.set_scoresC(beam.scores[i], features, nr_feat)
-        if gold is not None:
-            n_gold = 0
-            lines = []
-            for i in range(beam.size):
-                stcls = <StateClass>beam.at(i)
-                if not stcls.c.is_final():
-                    self.moves.set_costs(beam.is_valid[i], beam.costs[i], stcls, gold)
-                    if follow_gold:
-                        for j in range(self.moves.n_moves):
-                            if beam.costs[i][j] >= 1:
-                                beam.is_valid[i][j] = 0
-                                lines.append((stcls.B(0), stcls.B(1),
-                                    stcls.B_(0).ent_iob, stcls.B_(1).ent_iob,
-                                    stcls.B_(1).sent_start,
-                                    j,
-                                    beam.is_valid[i][j], 'set invalid',
-                                    beam.costs[i][j], self.moves.c[j].move, self.moves.c[j].label))
-                            n_gold += 1 if beam.is_valid[i][j] else 0
-            if follow_gold and n_gold == 0:
-                raise Exception("No gold")
-        if follow_gold:
-            beam.advance(_transition_state, NULL, <void*>self.moves.c)
-        else:
-            beam.advance(_transition_state, _hash_state, <void*>self.moves.c)
-        beam.check_done(_check_final_state, NULL)
-
-
-# These are passed as callbacks to thinc.search.Beam
-cdef int _transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1:
-    dest = <StateClass>_dest
-    src = <StateClass>_src
-    moves = <const Transition*>_moves
-    dest.clone(src)
-    moves[clas].do(dest.c, moves[clas].label)
-
-
-cdef int _check_final_state(void* _state, void* extra_args) except -1:
-    return (<StateClass>_state).is_final()
-
-
-def _cleanup(Beam beam):
-    for i in range(beam.width):
-        Py_XDECREF(<PyObject*>beam._states[i].content)
-        Py_XDECREF(<PyObject*>beam._parents[i].content)
-
-
-cdef hash_t _hash_state(void* _state, void* _) except 0:
-    state = <StateClass>_state
-    if state.c.is_final():
-        return 1
-    else:
-        return state.c.hash()
-
-
-def _check_train_integrity(Beam pred, Beam gold, GoldParse gold_parse, TransitionSystem moves):
-    for i in range(pred.size):
-        if not pred._states[i].is_done or pred._states[i].loss == 0:
-            continue
-        state = <StateClass>pred.at(i)
-        if moves.is_gold_parse(state, gold_parse) == True:
-            for dep in gold_parse.orig_annot:
-                print(dep[1], dep[3], dep[4])
-            print("Cost", pred._states[i].loss)
-            for j in range(gold_parse.length):
-                print(gold_parse.orig_annot[j][1], state.H(j), moves.strings[state.safe_get(j).dep])
-            acts = [moves.c[clas].move for clas in pred.histories[i]]
-            labels = [moves.c[clas].label for clas in pred.histories[i]]
-            print([moves.move_name(move, label) for move, label in zip(acts, labels)])
-            raise Exception("Predicted state is gold-standard")
-    for i in range(gold.size):
-        if not gold._states[i].is_done:
-            continue
-        state = <StateClass>gold.at(i)
-        if moves.is_gold(state, gold_parse) == False:
-            print("Truth")
-            for dep in gold_parse.orig_annot:
-                print(dep[1], dep[3], dep[4])
-            print("Predicted good")
-            for j in range(gold_parse.length):
-                print(gold_parse.orig_annot[j][1], state.H(j), moves.strings[state.safe_get(j).dep])
-            raise Exception("Gold parse is not gold-standard")
-
-
diff --git a/spacy/syntax/parser.pxd b/spacy/syntax/parser.pxd
deleted file mode 100644
index 95b6c3d3f..000000000
--- a/spacy/syntax/parser.pxd
+++ /dev/null
@@ -1,24 +0,0 @@
-from thinc.linear.avgtron cimport AveragedPerceptron
-from thinc.typedefs cimport atom_t
-from thinc.structs cimport FeatureC
-
-from .stateclass cimport StateClass
-from .arc_eager cimport TransitionSystem
-from ..vocab cimport Vocab
-from ..tokens.doc cimport Doc
-from ..structs cimport TokenC
-from ._state cimport StateC
-
-
-cdef class ParserModel(AveragedPerceptron):
-    cdef int set_featuresC(self, atom_t* context, FeatureC* features,
-                            const StateC* state) nogil
-
-
-cdef class Parser:
-    cdef readonly Vocab vocab
-    cdef readonly ParserModel model
-    cdef readonly TransitionSystem moves
-    cdef readonly object cfg
-
-    cdef int parseC(self, TokenC* tokens, int length, int nr_feat) nogil
diff --git a/spacy/syntax/parser.pyx b/spacy/syntax/parser.pyx
deleted file mode 100644
index 78698db12..000000000
--- a/spacy/syntax/parser.pyx
+++ /dev/null
@@ -1,526 +0,0 @@
-"""
-MALT-style dependency parser
-"""
-# coding: utf-8
-# cython: infer_types=True
-from __future__ import unicode_literals
-
-from collections import Counter
-import ujson
-
-cimport cython
-cimport cython.parallel
-
-import numpy.random
-
-from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
-from cpython.exc cimport PyErr_CheckSignals
-from libc.stdint cimport uint32_t, uint64_t
-from libc.string cimport memset, memcpy
-from libc.stdlib cimport malloc, calloc, free
-from thinc.typedefs cimport weight_t, class_t, feat_t, atom_t, hash_t
-from thinc.linear.avgtron cimport AveragedPerceptron
-from thinc.linalg cimport VecVec
-from thinc.structs cimport SparseArrayC, FeatureC, ExampleC
-from thinc.extra.eg cimport Example
-from cymem.cymem cimport Pool, Address
-from murmurhash.mrmr cimport hash64
-from preshed.maps cimport MapStruct
-from preshed.maps cimport map_get
-
-from . import _parse_features
-from ._parse_features cimport CONTEXT_SIZE
-from ._parse_features cimport fill_context
-from .stateclass cimport StateClass
-from ._state cimport StateC
-from .transition_system import OracleError
-from .transition_system cimport TransitionSystem, Transition
-from ..structs cimport TokenC
-from ..tokens.doc cimport Doc
-from ..strings cimport StringStore
-from ..gold cimport GoldParse
-
-
-USE_FTRL = True
-DEBUG = False
-def set_debug(val):
-    global DEBUG
-    DEBUG = val
-
-
-def get_templates(name):
-    pf = _parse_features
-    if name == 'ner':
-        return pf.ner
-    elif name == 'debug':
-        return pf.unigrams
-    elif name.startswith('embed'):
-        return (pf.words, pf.tags, pf.labels)
-    else:
-        return (pf.unigrams + pf.s0_n0 + pf.s1_n0 + pf.s1_s0 + pf.s0_n1 + pf.n0_n1 + \
-                pf.tree_shape + pf.trigrams)
-
-
-cdef class ParserModel(AveragedPerceptron):
-    cdef int set_featuresC(self, atom_t* context, FeatureC* features,
-            const StateC* state) nogil:
-        fill_context(context, state)
-        nr_feat = self.extracter.set_features(features, context)
-        return nr_feat
-
-    def update(self, Example eg, itn=0):
-        """
-        Does regression on negative cost. Sort of cute?
-        """
-        self.time += 1
-        cdef int best = arg_max_if_gold(eg.c.scores, eg.c.costs, eg.c.nr_class)
-        cdef int guess = eg.guess
-        if guess == best or best == -1:
-            return 0.0
-        cdef FeatureC feat
-        cdef int clas
-        cdef weight_t gradient
-        if USE_FTRL:
-            for feat in eg.c.features[:eg.c.nr_feat]:
-                for clas in range(eg.c.nr_class):
-                    if eg.c.is_valid[clas] and eg.c.scores[clas] >= eg.c.scores[best]:
-                        gradient = eg.c.scores[clas] + eg.c.costs[clas]
-                        self.update_weight_ftrl(feat.key, clas, feat.value * gradient)
-        else:
-            for feat in eg.c.features[:eg.c.nr_feat]:
-                self.update_weight(feat.key, guess, feat.value * eg.c.costs[guess])
-                self.update_weight(feat.key, best, -feat.value * eg.c.costs[guess])
-        return eg.c.costs[guess]
-
-    def update_from_histories(self, TransitionSystem moves, Doc doc, histories, weight_t min_grad=0.0):
-        cdef Pool mem = Pool()
-        features = <FeatureC*>mem.alloc(self.nr_feat, sizeof(FeatureC))
-
-        cdef StateClass stcls
-
-        cdef class_t clas
-        self.time += 1
-        cdef atom_t[CONTEXT_SIZE] atoms
-        histories = [(grad, hist) for grad, hist in histories if abs(grad) >= min_grad and hist]
-        if not histories:
-            return None
-        gradient = [Counter() for _ in range(max([max(h)+1 for _, h in histories]))]
-        for d_loss, history in histories:
-            stcls = StateClass.init(doc.c, doc.length)
-            moves.initialize_state(stcls.c)
-            for clas in history:
-                nr_feat = self.set_featuresC(atoms, features, stcls.c)
-                clas_grad = gradient[clas]
-                for feat in features[:nr_feat]:
-                    clas_grad[feat.key] += d_loss * feat.value
-                moves.c[clas].do(stcls.c, moves.c[clas].label)
-        cdef feat_t key
-        cdef weight_t d_feat
-        for clas, clas_grad in enumerate(gradient):
-            for key, d_feat in clas_grad.items():
-                if d_feat != 0:
-                    self.update_weight_ftrl(key, clas, d_feat)
-
-
-cdef class Parser:
-    """
-    Base class of the DependencyParser and EntityRecognizer.
-    """
-    @classmethod
-    def load(cls, path, Vocab vocab, TransitionSystem=None, require=False, **cfg):
-        """
-        Load the statistical model from the supplied path.
-
-        Arguments:
-            path (Path):
-                The path to load from.
-            vocab (Vocab):
-                The vocabulary. Must be shared by the documents to be processed.
-            require (bool):
-                Whether to raise an error if the files are not found.
-        Returns (Parser):
-            The newly constructed object.
-        """
-        with (path / 'config.json').open() as file_:
-            cfg = ujson.load(file_)
-        # TODO: remove this shim when we don't have to support older data
-        if 'labels' in cfg and 'actions' not in cfg:
-            cfg['actions'] = cfg.pop('labels')
-        # TODO: remove this shim when we don't have to support older data
-        for action_name, labels in dict(cfg.get('actions', {})).items():
-            # We need this to be sorted
-            if isinstance(labels, dict):
-                labels = list(sorted(labels.keys()))
-            cfg['actions'][action_name] = labels
-        self = cls(vocab, TransitionSystem=TransitionSystem, model=None, **cfg)
-        if (path / 'model').exists():
-            self.model.load(str(path / 'model'))
-        elif require:
-            raise IOError(
-                "Required file %s/model not found when loading" % str(path))
-        return self
-
-    def __init__(self, Vocab vocab, TransitionSystem=None, ParserModel model=None, **cfg):
-        """
-        Create a Parser.
-
-        Arguments:
-            vocab (Vocab):
-                The vocabulary object. Must be shared with documents to be processed.
-            model (thinc.linear.AveragedPerceptron):
-                The statistical model.
-        Returns (Parser):
-            The newly constructed object.
-        """
-        if TransitionSystem is None:
-            TransitionSystem = self.TransitionSystem
-        self.vocab = vocab
-        cfg['actions'] = TransitionSystem.get_actions(**cfg)
-        self.moves = TransitionSystem(vocab.strings, cfg['actions'])
-        # TODO: Remove this when we no longer need to support old-style models
-        if isinstance(cfg.get('features'), basestring):
-            cfg['features'] = get_templates(cfg['features'])
-        elif 'features' not in cfg:
-            cfg['features'] = self.feature_templates
-
-        self.model = ParserModel(cfg['features'])
-        self.model.l1_penalty = cfg.get('L1', 0.0)
-        self.model.learn_rate = cfg.get('learn_rate', 0.001)
-
-        self.cfg = cfg
-        # TODO: This is a pretty hacky fix to the problem of adding more
-        # labels. The issue is they come in out of order, if labels are
-        # added during training
-        for label in cfg.get('extra_labels', []):
-            self.add_label(label)
-
-    def __reduce__(self):
-        return (Parser, (self.vocab, self.moves, self.model), None, None)
-
-    def __call__(self, Doc tokens):
-        """
-        Apply the entity recognizer, setting the annotations onto the Doc object.
-
-        Arguments:
-            doc (Doc): The document to be processed.
-        Returns:
-            None
-        """
-        cdef int nr_feat = self.model.nr_feat
-        with nogil:
-            status = self.parseC(tokens.c, tokens.length, nr_feat)
-        # Check for KeyboardInterrupt etc. Untested
-        PyErr_CheckSignals()
-        if status != 0:
-            raise ParserStateError(tokens)
-        self.moves.finalize_doc(tokens)
-
-    def pipe(self, stream, int batch_size=1000, int n_threads=2):
-        """
-        Process a stream of documents.
-
-        Arguments:
-            stream: The sequence of documents to process.
-            batch_size (int):
-                The number of documents to accumulate into a working set.
-            n_threads (int):
-                The number of threads with which to work on the buffer in parallel.
-        Yields (Doc): Documents, in order.
-        """
-        cdef Pool mem = Pool()
-        cdef TokenC** doc_ptr = <TokenC**>mem.alloc(batch_size, sizeof(TokenC*))
-        cdef int* lengths = <int*>mem.alloc(batch_size, sizeof(int))
-        cdef Doc doc
-        cdef int i
-        cdef int nr_feat = self.model.nr_feat
-        cdef int status
-        queue = []
-        for doc in stream:
-            doc_ptr[len(queue)] = doc.c
-            lengths[len(queue)] = doc.length
-            queue.append(doc)
-            if len(queue) == batch_size:
-                with nogil:
-                    for i in cython.parallel.prange(batch_size, num_threads=n_threads):
-                        status = self.parseC(doc_ptr[i], lengths[i], nr_feat)
-                        if status != 0:
-                            with gil:
-                                raise ParserStateError(queue[i])
-                PyErr_CheckSignals()
-                for doc in queue:
-                    self.moves.finalize_doc(doc)
-                    yield doc
-                queue = []
-        batch_size = len(queue)
-        with nogil:
-            for i in cython.parallel.prange(batch_size, num_threads=n_threads):
-                status = self.parseC(doc_ptr[i], lengths[i], nr_feat)
-                if status != 0:
-                    with gil:
-                        raise ParserStateError(queue[i])
-        PyErr_CheckSignals()
-        for doc in queue:
-            self.moves.finalize_doc(doc)
-            yield doc
-
-    cdef int parseC(self, TokenC* tokens, int length, int nr_feat) nogil:
-        state = new StateC(tokens, length)
-        # NB: This can change self.moves.n_moves!
-        # I think this causes memory errors if called by .pipe()
-        self.moves.initialize_state(state)
-        nr_class = self.moves.n_moves
-
-        cdef ExampleC eg
-        eg.nr_feat = nr_feat
-        eg.nr_atom = CONTEXT_SIZE
-        eg.nr_class = nr_class
-        eg.features = <FeatureC*>calloc(sizeof(FeatureC), nr_feat)
-        eg.atoms = <atom_t*>calloc(sizeof(atom_t), CONTEXT_SIZE)
-        eg.scores = <weight_t*>calloc(sizeof(weight_t), nr_class)
-        eg.is_valid = <int*>calloc(sizeof(int), nr_class)
-        cdef int i
-        while not state.is_final():
-            eg.nr_feat = self.model.set_featuresC(eg.atoms, eg.features, state)
-            self.moves.set_valid(eg.is_valid, state)
-            self.model.set_scoresC(eg.scores, eg.features, eg.nr_feat)
-
-            guess = VecVec.arg_max_if_true(eg.scores, eg.is_valid, eg.nr_class)
-            if guess < 0:
-                return 1
-
-            action = self.moves.c[guess]
-
-            action.do(state, action.label)
-            memset(eg.scores, 0, sizeof(eg.scores[0]) * eg.nr_class)
-            for i in range(eg.nr_class):
-                eg.is_valid[i] = 1
-        self.moves.finalize_state(state)
-        for i in range(length):
-            tokens[i] = state._sent[i]
-        del state
-        free(eg.features)
-        free(eg.atoms)
-        free(eg.scores)
-        free(eg.is_valid)
-        return 0
-
-    def update(self, Doc tokens, GoldParse gold, itn=0, double drop=0.0):
-        """
-        Update the statistical model.
-
-        Arguments:
-            doc (Doc):
-                The example document for the update.
-            gold (GoldParse):
-                The gold-standard annotations, to calculate the loss.
-        Returns (float):
-            The loss on this example.
-        """
-        self.moves.preprocess_gold(gold)
-        cdef StateClass stcls = StateClass.init(tokens.c, tokens.length)
-        self.moves.initialize_state(stcls.c)
-        cdef Pool mem = Pool()
-        cdef Example eg = Example(
-                nr_class=self.moves.n_moves,
-                nr_atom=CONTEXT_SIZE,
-                nr_feat=self.model.nr_feat)
-        cdef weight_t loss = 0
-        cdef Transition action
-        cdef double dropout_rate = self.cfg.get('dropout', drop)
-        while not stcls.is_final():
-            eg.c.nr_feat = self.model.set_featuresC(eg.c.atoms, eg.c.features,
-                                                    stcls.c)
-            dropout(eg.c.features, eg.c.nr_feat, dropout_rate)
-            self.moves.set_costs(eg.c.is_valid, eg.c.costs, stcls, gold)
-            self.model.set_scoresC(eg.c.scores, eg.c.features, eg.c.nr_feat)
-            guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class)
-            self.model.update(eg)
-
-            action = self.moves.c[guess]
-            action.do(stcls.c, action.label)
-            loss += eg.costs[guess]
-            eg.fill_scores(0, eg.c.nr_class)
-            eg.fill_costs(0, eg.c.nr_class)
-            eg.fill_is_valid(1, eg.c.nr_class)
-
-        self.moves.finalize_state(stcls.c)
-        return loss
-
-    def step_through(self, Doc doc, GoldParse gold=None):
-        """
-        Set up a stepwise state, to introspect and control the transition sequence.
-
-        Arguments:
-            doc (Doc): The document to step through.
-            gold (GoldParse): Optional gold parse
-        Returns (StepwiseState):
-            A state object, to step through the annotation process.
-        """
-        return StepwiseState(self, doc, gold=gold)
-
-    def from_transition_sequence(self, Doc doc, sequence):
-        """Control the annotations on a document by specifying a transition sequence
-        to follow.
-
-        Arguments:
-            doc (Doc): The document to annotate.
-            sequence: A sequence of action names, as unicode strings.
-        Returns: None
-        """
-        with self.step_through(doc) as stepwise:
-            for transition in sequence:
-                stepwise.transition(transition)
-
-    def add_label(self, label):
-        # Doesn't set label into serializer -- subclasses override it to do that.
-        for action in self.moves.action_types:
-            added = self.moves.add_action(action, label)
-            if added:
-                # Important that the labels be stored as a list! We need the
-                # order, or the model goes out of synch
-                self.cfg.setdefault('extra_labels', []).append(label)
-
-
-cdef int dropout(FeatureC* feats, int nr_feat, float prob) except -1:
-    if prob <= 0 or prob >= 1.:
-        return 0
-    cdef double[::1] py_probs = numpy.random.uniform(0., 1., nr_feat)
-    cdef double* probs = &py_probs[0]
-    for i in range(nr_feat):
-        if probs[i] >= prob:
-            feats[i].value /= prob
-        else:
-            feats[i].value = 0.
-
-
-cdef class StepwiseState:
-    cdef readonly StateClass stcls
-    cdef readonly Example eg
-    cdef readonly Doc doc
-    cdef readonly GoldParse gold
-    cdef readonly Parser parser
-
-    def __init__(self, Parser parser, Doc doc, GoldParse gold=None):
-        self.parser = parser
-        self.doc = doc
-        if gold is not None:
-            self.gold = gold
-            self.parser.moves.preprocess_gold(self.gold)
-        else:
-            self.gold = GoldParse(doc)
-        self.stcls = StateClass.init(doc.c, doc.length)
-        self.parser.moves.initialize_state(self.stcls.c)
-        self.eg = Example(
-            nr_class=self.parser.moves.n_moves,
-            nr_atom=CONTEXT_SIZE,
-            nr_feat=self.parser.model.nr_feat)
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, type, value, traceback):
-        self.finish()
-
-    @property
-    def is_final(self):
-        return self.stcls.is_final()
-
-    @property
-    def stack(self):
-        return self.stcls.stack
-
-    @property
-    def queue(self):
-        return self.stcls.queue
-
-    @property
-    def heads(self):
-        return [self.stcls.H(i) for i in range(self.stcls.c.length)]
-
-    @property
-    def deps(self):
-        return [self.doc.vocab.strings[self.stcls.c._sent[i].dep]
-                for i in range(self.stcls.c.length)]
-
-    @property
-    def costs(self):
-        """
-        Find the action-costs for the current state.
-        """
-        if not self.gold:
-            raise ValueError("Can't set costs: No GoldParse provided")
-        self.parser.moves.set_costs(self.eg.c.is_valid, self.eg.c.costs,
-                self.stcls, self.gold)
-        costs = {}
-        for i in range(self.parser.moves.n_moves):
-            if not self.eg.c.is_valid[i]:
-                continue
-            transition = self.parser.moves.c[i]
-            name = self.parser.moves.move_name(transition.move, transition.label)
-            costs[name] = self.eg.c.costs[i]
-        return costs
-
-    def predict(self):
-        self.eg.reset()
-        self.eg.c.nr_feat = self.parser.model.set_featuresC(self.eg.c.atoms, self.eg.c.features,
-                                                            self.stcls.c)
-        self.parser.moves.set_valid(self.eg.c.is_valid, self.stcls.c)
-        self.parser.model.set_scoresC(self.eg.c.scores,
-            self.eg.c.features, self.eg.c.nr_feat)
-
-        cdef Transition action = self.parser.moves.c[self.eg.guess]
-        return self.parser.moves.move_name(action.move, action.label)
-
-    def transition(self, action_name=None):
-        if action_name is None:
-            action_name = self.predict()
-        moves = {'S': 0, 'D': 1, 'L': 2, 'R': 3}
-        if action_name == '_':
-            action_name = self.predict()
-            action = self.parser.moves.lookup_transition(action_name)
-        elif action_name == 'L' or action_name == 'R':
-            self.predict()
-            move = moves[action_name]
-            clas = _arg_max_clas(self.eg.c.scores, move, self.parser.moves.c,
-                                 self.eg.c.nr_class)
-            action = self.parser.moves.c[clas]
-        else:
-            action = self.parser.moves.lookup_transition(action_name)
-        action.do(self.stcls.c, action.label)
-
-    def finish(self):
-        if self.stcls.is_final():
-            self.parser.moves.finalize_state(self.stcls.c)
-        self.doc.set_parse(self.stcls.c._sent)
-        self.parser.moves.finalize_doc(self.doc)
-
-
-class ParserStateError(ValueError):
-    def __init__(self, doc):
-        ValueError.__init__(self,
-            "Error analysing doc -- no valid actions available. This should "
-            "never happen, so please report the error on the issue tracker. "
-            "Here's the thread to do so --- reopen it if it's closed:\n"
-            "https://github.com/spacy-io/spaCy/issues/429\n"
-            "Please include the text that the parser failed on, which is:\n"
-            "%s" % repr(doc.text))
-
-cdef int arg_max_if_gold(const weight_t* scores, const weight_t* costs, int n) nogil:
-    cdef int best = -1
-    for i in range(n):
-        if costs[i] <= 0:
-            if best == -1 or scores[i] > scores[best]:
-                best = i
-    return best
-
-
-cdef int _arg_max_clas(const weight_t* scores, int move, const Transition* actions,
-                       int nr_class) except -1:
-    cdef weight_t score = 0
-    cdef int mode = -1
-    cdef int i
-    for i in range(nr_class):
-        if actions[i].move == move and (mode == -1 or scores[i] >= score):
-            mode = i
-            score = scores[i]
-    return mode