diff --git a/setup.py b/setup.py
index 34123d92b..d063157cc 100644
--- a/setup.py
+++ b/setup.py
@@ -210,7 +210,6 @@ MOD_NAMES = ['spacy.parts_of_speech', 'spacy.strings',
              'spacy.lexeme', 'spacy.vocab', 'spacy.attrs',
              'spacy.morphology', 'spacy.tagger',
              'spacy.syntax.stateclass', 
-             'spacy._ml',
              'spacy.tokenizer',
              'spacy.syntax.parser', 
              'spacy.syntax.transition_system',
diff --git a/spacy/language.py b/spacy/language.py
index 3087a2373..f598518e2 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1,3 +1,4 @@
+from __future__ import absolute_import
 from os import path
 from warnings import warn
 import io
@@ -13,7 +14,6 @@ from .syntax.parser import Parser
 from .tagger import Tagger
 from .matcher import Matcher
 from .serialize.packer import Packer
-from ._ml import Model
 from . import attrs
 from . import orth
 from .syntax.ner import BiluoPushDown
@@ -245,9 +245,12 @@ class Language(object):
     def end_training(self, data_dir=None):
         if data_dir is None:
             data_dir = self.data_dir
-        self.parser.model.end_training(path.join(data_dir, 'deps', 'model'))
-        self.entity.model.end_training(path.join(data_dir, 'ner', 'model'))
-        self.tagger.model.end_training(path.join(data_dir, 'pos', 'model'))
+        self.parser.model.end_training()
+        self.parser.model.dump(path.join(data_dir, 'deps', 'model'))
+        self.entity.model.end_training()
+        self.entity.model.dump(path.join(data_dir, 'ner', 'model'))
+        self.tagger.model.end_training()
+        self.tagger.model.dump(path.join(data_dir, 'pos', 'model'))
 
         strings_loc = path.join(data_dir, 'vocab', 'strings.json')
         with io.open(strings_loc, 'w', encoding='utf8') as file_:
diff --git a/spacy/strings.pyx b/spacy/strings.pyx
index b9264b915..ef8422aa0 100644
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@@ -78,7 +78,7 @@ cdef class StringStore:
     def __init__(self, strings=None):
         self.mem = Pool()
         self._map = PreshMap()
-        self._resize_at = 10
+        self._resize_at = 10000
         self.c = <Utf8Str*>self.mem.alloc(self._resize_at, sizeof(Utf8Str))
         self.size = 1
         if strings is not None:
diff --git a/spacy/syntax/parser.pxd b/spacy/syntax/parser.pxd
index 70a0229c2..33e57a2cb 100644
--- a/spacy/syntax/parser.pxd
+++ b/spacy/syntax/parser.pxd
@@ -1,18 +1,17 @@
 from thinc.search cimport Beam
+from thinc.api cimport AveragedPerceptron
+from thinc.api cimport Example, ExampleC
 
-from .._ml cimport Model
-
+from .stateclass cimport StateClass
 from .arc_eager cimport TransitionSystem
-
 from ..tokens.doc cimport Doc
 from ..structs cimport TokenC
-from thinc.api cimport Example, ExampleC
-from .stateclass cimport StateClass
+
+
+cdef class ParserModel(AveragedPerceptron):
+    cdef void set_features(self, ExampleC* eg, StateClass stcls) except *
 
 
 cdef class Parser:
-    cdef readonly Model model
+    cdef readonly ParserModel model
     cdef readonly TransitionSystem moves
-
-    cdef void parse(self, StateClass stcls, ExampleC eg) nogil
-    cdef void predict(self, StateClass stcls, ExampleC* eg) nogil
diff --git a/spacy/syntax/parser.pyx b/spacy/syntax/parser.pyx
index 4b25613ad..f746dd715 100644
--- a/spacy/syntax/parser.pyx
+++ b/spacy/syntax/parser.pyx
@@ -18,18 +18,15 @@ import sys
 from cymem.cymem cimport Pool, Address
 from murmurhash.mrmr cimport hash64
 from thinc.typedefs cimport weight_t, class_t, feat_t, atom_t, hash_t
+from thinc.features cimport ConjunctionExtracter
 
 from util import Config
 
-from thinc.api cimport Example, ExampleC
-
-
 from ..structs cimport TokenC
 
 from ..tokens.doc cimport Doc
 from ..strings cimport StringStore
 
-
 from .transition_system import OracleError
 from .transition_system cimport TransitionSystem, Transition
 
@@ -40,7 +37,6 @@ from ._parse_features cimport CONTEXT_SIZE
 from ._parse_features cimport fill_context
 from .stateclass cimport StateClass
 
-from thinc.learner cimport arg_max_if_true
 
 
 DEBUG = False
@@ -66,8 +62,18 @@ def ParserFactory(transition_system):
     return lambda strings, dir_: Parser(strings, dir_, transition_system)
 
 
+cdef class ParserModel(AveragedPerceptron):
+    def __init__(self, n_classes, templates):
+        AveragedPerceptron.__init__(self, n_classes,
+            ConjunctionExtracter(CONTEXT_SIZE, templates))
+
+    cdef void set_features(self, ExampleC* eg, StateClass stcls) except *: 
+        fill_context(eg.atoms, stcls)
+        eg.nr_feat = self.extracter.set_features(eg.features, eg.atoms)
+
+
 cdef class Parser:
-    def __init__(self, StringStore strings, transition_system, model):
+    def __init__(self, StringStore strings, transition_system, ParserModel model):
         self.moves = transition_system
         self.model = model
 
@@ -80,54 +86,50 @@ cdef class Parser:
         cfg = Config.read(model_dir, 'config')
         moves = transition_system(strings, cfg.labels)
         templates = get_templates(cfg.features)
-        model = Model(moves.n_moves, templates, model_dir)
+        model = ParserModel(moves.n_moves, templates)
+        if path.exists(path.join(model_dir, 'model')):
+            model.load(path.join(model_dir, 'model'))
         return cls(strings, moves, model)
 
+    def __reduce__(self):
+        return (Parser, (self.moves.strings, self.moves, self.model), None, None)
+
     def __call__(self, Doc tokens):
         cdef StateClass stcls = StateClass.init(tokens.c, tokens.length)
         self.moves.initialize_state(stcls)
 
-        cdef Example eg = Example(self.model.n_classes, CONTEXT_SIZE,
-                                  self.model.n_feats, self.model.n_feats)
-        self.parse(stcls, eg.c)
-        tokens.set_parse(stcls._sent)
-
-    def __reduce__(self):
-        return (Parser, (self.moves.strings, self.moves, self.model), None, None)
-
-    cdef void predict(self, StateClass stcls, ExampleC* eg) nogil:
-        memset(eg.scores, 0, eg.nr_class * sizeof(weight_t))
-        self.moves.set_valid(eg.is_valid, stcls)
-        fill_context(eg.atoms, stcls)
-        self.model.set_scores(eg.scores, eg.atoms)
-        eg.guess = arg_max_if_true(eg.scores, eg.is_valid, self.model.n_classes)
-
-    cdef void parse(self, StateClass stcls, ExampleC eg) nogil:
+        cdef Pool mem = Pool()
+        cdef ExampleC eg = self.model.allocate(mem)
         while not stcls.is_final():
-            self.predict(stcls, &eg)
-            if not eg.is_valid[eg.guess]:
-                break
-            self.moves.c[eg.guess].do(stcls, self.moves.c[eg.guess].label)
-        self.moves.finalize_state(stcls)
+            self.model.set_features(&eg, stcls)
+            self.moves.set_valid(eg.is_valid, stcls)
+            self.model.set_prediction(&eg)
 
+            assert eg.is_valid[eg.guess]
+            
+            action = self.moves.c[eg.guess]
+            action.do(stcls, action.label)
+        self.moves.finalize_state(stcls)
+        tokens.set_parse(stcls._sent)
+  
     def train(self, Doc tokens, GoldParse gold):
         self.moves.preprocess_gold(gold)
         cdef StateClass stcls = StateClass.init(tokens.c, tokens.length)
         self.moves.initialize_state(stcls)
-        cdef Example eg = Example(self.model.n_classes, CONTEXT_SIZE,
-                                  self.model.n_feats, self.model.n_feats)
+        cdef Pool mem = Pool()
+        cdef ExampleC eg = self.model.allocate(mem)
         cdef weight_t loss = 0
         words = [w.orth_ for w in tokens]
-        cdef Transition G
+        cdef Transition action
         while not stcls.is_final():
-            memset(eg.c.scores, 0, eg.c.nr_class * sizeof(weight_t))
-            self.moves.set_costs(eg.c.is_valid, eg.c.costs, stcls, gold)
-            fill_context(eg.c.atoms, stcls)
-            self.model.train(eg)
-            G = self.moves.c[eg.c.guess]
+            self.model.set_features(&eg, stcls)
+            self.moves.set_costs(eg.is_valid, eg.costs, stcls, gold)
+            self.model.set_prediction(&eg)
+            self.model.update(&eg)
 
-            self.moves.c[eg.c.guess].do(stcls, self.moves.c[eg.c.guess].label)
-            loss += eg.c.loss
+            action = self.moves.c[eg.guess]
+            action.do(stcls, action.label)
+            loss += eg.costs[eg.guess]
         return loss
 
     def step_through(self, Doc doc):
@@ -176,7 +178,10 @@ cdef class StepwiseState:
                 for i in range(self.stcls.length)]
 
     def predict(self):
-        self.parser.predict(self.stcls, &self.eg.c)
+        self.parser.model.set_features(&self.eg.c, self.stcls)
+        self.parser.moves.set_valid(self.eg.c.is_valid, self.stcls)
+        self.parser.model.set_prediction(&self.eg.c)
+
         action = self.parser.moves.c[self.eg.c.guess]
         return self.parser.moves.move_name(action.move, action.label)
 
diff --git a/spacy/tagger.pxd b/spacy/tagger.pxd
index ad2a90970..30626e775 100644
--- a/spacy/tagger.pxd
+++ b/spacy/tagger.pxd
@@ -1,9 +1,17 @@
-from ._ml cimport Model
+from thinc.api cimport AveragedPerceptron
+from thinc.api cimport ExampleC
+
 from .structs cimport TokenC
 from .vocab cimport Vocab
 
 
+cdef class TaggerModel(AveragedPerceptron):
+    cdef void set_features(self, ExampleC* eg, const TokenC* tokens, int i) except *
+    cdef void set_costs(self, ExampleC* eg, int gold) except *
+    cdef void update(self, ExampleC* eg) except *
+ 
+
 cdef class Tagger:
     cdef readonly Vocab vocab
-    cdef readonly Model model
+    cdef readonly TaggerModel model
     cdef public dict freqs
diff --git a/spacy/tagger.pyx b/spacy/tagger.pyx
index 9e5f0784e..58ee906e8 100644
--- a/spacy/tagger.pyx
+++ b/spacy/tagger.pyx
@@ -1,10 +1,12 @@
 import json
 from os import path
 from collections import defaultdict
+from libc.string cimport memset
 
+from cymem.cymem cimport Pool
 from thinc.typedefs cimport atom_t, weight_t
-from thinc.learner cimport arg_max, arg_max_if_true, arg_max_if_zero
-from thinc.api cimport Example
+from thinc.api cimport Example, ExampleC
+from thinc.features cimport ConjunctionExtracter
 
 from .typedefs cimport attr_t
 from .tokens.doc cimport Doc
@@ -64,6 +66,44 @@ cpdef enum:
     N_CONTEXT_FIELDS
 
 
+cdef class TaggerModel(AveragedPerceptron):
+    def __init__(self, n_classes, templates):
+        AveragedPerceptron.__init__(self, n_classes,
+            ConjunctionExtracter(N_CONTEXT_FIELDS, templates))
+
+    cdef void set_features(self, ExampleC* eg, const TokenC* tokens, int i) except *:
+        _fill_from_token(&eg.atoms[P2_orth], &tokens[i-2])
+        _fill_from_token(&eg.atoms[P1_orth], &tokens[i-1])
+        _fill_from_token(&eg.atoms[W_orth], &tokens[i])
+        _fill_from_token(&eg.atoms[N1_orth], &tokens[i+1])
+        _fill_from_token(&eg.atoms[N2_orth], &tokens[i+2])
+
+        eg.nr_feat = self.extracter.set_features(eg.features, eg.atoms)
+
+    cdef void update(self, ExampleC* eg) except *:
+        self.updater.update(eg)
+   
+
+cdef inline void _fill_from_token(atom_t* context, const TokenC* t) nogil:
+    context[0] = t.lex.lower
+    context[1] = t.lex.cluster
+    context[2] = t.lex.shape
+    context[3] = t.lex.prefix
+    context[4] = t.lex.suffix
+    context[5] = t.tag
+    context[6] = t.lemma
+    if t.lex.flags & (1 << IS_ALPHA):
+        context[7] = 1
+    elif t.lex.flags & (1 << IS_PUNCT):
+        context[7] = 2
+    elif t.lex.flags & (1 << LIKE_URL):
+        context[7] = 3
+    elif t.lex.flags & (1 << LIKE_NUM):
+        context[7] = 4
+    else:
+        context[7] = 0
+
+
 cdef class Tagger:
     """A part-of-speech tagger for English"""
     @classmethod
@@ -105,7 +145,7 @@ cdef class Tagger:
 
     @classmethod
     def blank(cls, vocab, templates):
-        model = Model(vocab.morphology.n_tags, templates, model_loc=None)
+        model = TaggerModel(vocab.morphology.n_tags, templates)
         return cls(vocab, model)
 
     @classmethod
@@ -114,10 +154,12 @@ cdef class Tagger:
             templates = json.loads(open(path.join(data_dir, 'templates.json')))
         else:
             templates = cls.default_templates()
-        model = Model(vocab.morphology.n_tags, templates, data_dir)
+        model = TaggerModel(vocab.morphology.n_tags, templates)
+        if path.exists(path.join(data_dir, 'model')):
+            model.load(path.join(data_dir, 'model'))
         return cls(vocab, model)
 
-    def __init__(self, Vocab vocab, model):
+    def __init__(self, Vocab vocab, TaggerModel model):
         self.vocab = vocab
         self.model = model
         
@@ -131,27 +173,6 @@ cdef class Tagger:
     def tag_names(self):
         return self.vocab.morphology.tag_names
 
-    def __call__(self, Doc tokens):
-        """Apply the tagger, setting the POS tags onto the Doc object.
-
-        Args:
-            tokens (Doc): The tokens to be tagged.
-        """
-        if tokens.length == 0:
-            return 0
-
-        cdef Example eg = self.model._eg
-        cdef int i
-        for i in range(tokens.length):
-            if tokens.c[i].pos == 0:
-                eg.wipe()
-                fill_atoms(eg.c.atoms, tokens.c, i)
-                self.model(eg)
-                self.vocab.morphology.assign_tag(&tokens.c[i], eg.c.guess)
-
-        tokens.is_tagged = True
-        tokens._py_tokens = [None] * tokens.length
-
     def __reduce__(self):
         return (self.__class__, (self.vocab, self.model), None, None)
 
@@ -162,53 +183,45 @@ cdef class Tagger:
         tokens.is_tagged = True
         tokens._py_tokens = [None] * tokens.length
 
+    def __call__(self, Doc tokens):
+        """Apply the tagger, setting the POS tags onto the Doc object.
+
+        Args:
+            tokens (Doc): The tokens to be tagged.
+        """
+        if tokens.length == 0:
+            return 0
+
+        cdef Pool mem = Pool()
+        cdef ExampleC eg 
+
+        cdef int i, tag
+        for i in range(tokens.length):
+            if tokens.c[i].pos == 0:
+                eg = self.model.allocate(mem)
+                self.model.set_features(&eg, tokens.c, i)
+                self.model.set_prediction(&eg)
+                self.vocab.morphology.assign_tag(&tokens.c[i], eg.guess)
+        tokens.is_tagged = True
+        tokens._py_tokens = [None] * tokens.length
+    
     def train(self, Doc tokens, object gold_tag_strs):
         assert len(tokens) == len(gold_tag_strs)
-        cdef int i
-        cdef int loss
-        cdef const weight_t* scores
-        try:
-            golds = [self.tag_names.index(g) if g is not None else -1 for g in gold_tag_strs]
-        except ValueError:
-            raise ValueError(
-                [g for g in gold_tag_strs if g is not None and g not in self.tag_names])
-        correct = 0
-        cdef Example eg = self.model._eg
+        golds = [self.tag_names.index(g) if g is not None else -1 for g in gold_tag_strs]
+        cdef int correct = 0
+        cdef Pool mem = Pool()
+        cdef ExampleC eg 
         for i in range(tokens.length):
-            eg.wipe()
-            fill_atoms(eg.c.atoms, tokens.c, i)
-            self.train(eg)
+            eg = self.model.allocate(mem)
+            self.model.set_features(&eg, tokens.c, i)
+            self.model.set_costs(&eg, golds[i])
+            self.model.set_prediction(&eg)
+            self.model.update(&eg)
 
-            self.vocab.morphology.assign_tag(&tokens.c[i], eg.c.guess)
+            self.vocab.morphology.assign_tag(&tokens.c[i], eg.guess)
             
-            correct += eg.c.cost == 0
+            correct += eg.cost == 0
             self.freqs[TAG][tokens.c[i].tag] += 1
+        tokens.is_tagged = True
+        tokens._py_tokens = [None] * tokens.length
         return correct
-
-
-cdef inline void fill_atoms(atom_t* atoms, const TokenC* tokens, int i) nogil:
-    _fill_from_token(&atoms[P2_orth], &tokens[i-2])
-    _fill_from_token(&atoms[P1_orth], &tokens[i-1])
-    _fill_from_token(&atoms[W_orth], &tokens[i])
-    _fill_from_token(&atoms[N1_orth], &tokens[i+1])
-    _fill_from_token(&atoms[N2_orth], &tokens[i+2])
-    
-
-cdef inline void _fill_from_token(atom_t* context, const TokenC* t) nogil:
-    context[0] = t.lex.lower
-    context[1] = t.lex.cluster
-    context[2] = t.lex.shape
-    context[3] = t.lex.prefix
-    context[4] = t.lex.suffix
-    context[5] = t.tag
-    context[6] = t.lemma
-    if t.lex.flags & (1 << IS_ALPHA):
-        context[7] = 1
-    elif t.lex.flags & (1 << IS_PUNCT):
-        context[7] = 2
-    elif t.lex.flags & (1 << LIKE_URL):
-        context[7] = 3
-    elif t.lex.flags & (1 << LIKE_NUM):
-        context[7] = 4
-    else:
-        context[7] = 0
diff --git a/spacy/tests/test_basic_create.py b/spacy/tests/test_basic_create.py
index 900a7bc64..322efee4a 100644
--- a/spacy/tests/test_basic_create.py
+++ b/spacy/tests/test_basic_create.py
@@ -11,7 +11,6 @@ from spacy.strings import StringStore
 from spacy.vocab import Vocab
 from spacy.tokenizer import Tokenizer
 from spacy.syntax.arc_eager import ArcEager
-from spacy._ml import Model
 from spacy.tagger import Tagger
 from spacy.syntax.parser import Parser
 from spacy.matcher import Matcher
diff --git a/spacy/tests/test_basic_load.py b/spacy/tests/test_basic_load.py
index 233ddd848..c70bcb84a 100644
--- a/spacy/tests/test_basic_load.py
+++ b/spacy/tests/test_basic_load.py
@@ -12,16 +12,13 @@ from spacy.strings import StringStore
 from spacy.vocab import Vocab
 from spacy.tokenizer import Tokenizer
 from spacy.syntax.arc_eager import ArcEager
-from spacy._ml import Model
 from spacy.tagger import Tagger
-from spacy.syntax.parser import Parser
+from spacy.syntax.parser import Parser, ParserModel
 from spacy.matcher import Matcher
 from spacy.syntax.parser import get_templates
 
 from spacy.en import English
 
-from thinc.learner import LinearModel
-
 
 class TestLoadVocab(unittest.TestCase):
     def test_load(self):
@@ -54,7 +51,6 @@ class TestLoadParser(unittest.TestCase):
         if path.exists(path.join(data_dir, 'deps')):
             parser = Parser.from_dir(path.join(data_dir, 'deps'), vocab.strings, ArcEager)
 
-    def test_load_careful(self):
         config_data = {"labels": {"0": {"": True}, "1": {"": True}, "2": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "dobj": True, "neg": True, "csubjpass": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "relcl": True, "quantmod": True, "acomp": True, "compound": True, "pcomp": True, "intj": True, "poss": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "amod": True, "dative": True, "pobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True, "acl": True}, "3": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "acl": True, "poss": True, "neg": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "amod": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "quantmod": True, "acomp": True, "pcomp": True, "intj": True, "relcl": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "dobj": True, "dative": True, "pobj": True, "iobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True}, "4": {"ROOT": True}}, "seed": 0, "features": "basic", "beam_width": 1}
 
         data_dir = English.default_data_dir()
@@ -63,20 +59,11 @@ class TestLoadParser(unittest.TestCase):
         moves = ArcEager(vocab.strings, config_data['labels'])
         templates = get_templates(config_data['features'])
 
-        model = Model(moves.n_moves, templates, path.join(data_dir, 'deps'))
+        model = ParserModel(moves.n_moves, templates)
+        model.load(path.join(data_dir, 'deps', 'model'))
 
         parser = Parser(vocab.strings, moves, model)
 
-    def test_thinc_load(self):
-        data_dir = English.default_data_dir()
-        model_loc = path.join(data_dir, 'deps', 'model')
-
-        # n classes. moves.n_moves above
-        # n features. len(templates) + 1 above
-        if path.exists(model_loc):
-            model = LinearModel(92, 116)
-            model.load(model_loc)
-
 
 if __name__ == '__main__':
     unittest.main()