diff --git a/examples/training/train_ner_standalone.py b/examples/training/train_ner_standalone.py
index 6cca56c69..e4fb1d1e8 100644
--- a/examples/training/train_ner_standalone.py
+++ b/examples/training/train_ner_standalone.py
@@ -20,9 +20,10 @@ import plac
 from pathlib import Path
 import random
 import json
+import tqdm
+
 from thinc.neural.optimizers import Adam
 from thinc.neural.ops import NumpyOps
-import tqdm
 
 from spacy.vocab import Vocab
 from spacy.pipeline import TokenVectorEncoder, NeuralEntityRecognizer
@@ -35,6 +36,7 @@ from spacy.gold import minibatch
 from spacy.scorer import Scorer
 import spacy.util
 
+
 try:
     unicode
 except NameError:
@@ -55,20 +57,17 @@ def init_vocab():
 
 
 class Pipeline(object):
-    def __init__(self, vocab=None, tokenizer=None, tensorizer=None, entity=None):
+    def __init__(self, vocab=None, tokenizer=None, entity=None):
         if vocab is None:
             vocab = init_vocab()
         if tokenizer is None:
             tokenizer = Tokenizer(vocab, {}, None, None, None)
-        if tensorizer is None:
-            tensorizer = TokenVectorEncoder(vocab)
         if entity is None:
             entity = NeuralEntityRecognizer(vocab)
         self.vocab = vocab
         self.tokenizer = tokenizer
-        self.tensorizer = tensorizer
         self.entity = entity
-        self.pipeline = [tensorizer, self.entity]
+        self.pipeline = [self.entity]
 
     def begin_training(self):
         for model in self.pipeline:
@@ -102,10 +101,8 @@ class Pipeline(object):
         golds = [self.make_gold(input_, annot) for input_, annot in
                  zip(inputs, annots)]
 
-        tensors, bp_tensors = self.tensorizer.update(docs, golds, drop=drop)
-        d_tensors = self.entity.update((docs, tensors), golds, drop=drop,
-                                      sgd=sgd, losses=losses)
-        bp_tensors(d_tensors, sgd=sgd)
+        self.entity.update(docs, golds, drop=drop,
+                           sgd=sgd, losses=losses)
         return losses
 
     def evaluate(self, examples):
@@ -123,7 +120,6 @@ class Pipeline(object):
         elif not path.is_dir():
             raise IOError("Can't save pipeline to %s\nNot a directory" % path)
         self.vocab.to_disk(path / 'vocab')
-        self.tensorizer.to_disk(path / 'tensorizer')
         self.entity.to_disk(path / 'ner')
 
     def from_disk(self, path):
@@ -133,7 +129,6 @@ class Pipeline(object):
         if not path.is_dir():
             raise IOError("Cannot load pipeline from %s\nNot a directory" % path)
         self.vocab = self.vocab.from_disk(path / 'vocab')
-        self.tensorizer = self.tensorizer.from_disk(path / 'tensorizer')
         self.entity = self.entity.from_disk(path / 'ner')
 
 
diff --git a/fabfile.py b/fabfile.py
index cfa80ead5..2894fe477 100644
--- a/fabfile.py
+++ b/fabfile.py
@@ -14,6 +14,7 @@ VENV_DIR = path.join(PWD, ENV)
 def env(lang='python2.7'):
     if path.exists(VENV_DIR):
         local('rm -rf {env}'.format(env=VENV_DIR))
+    local('pip install virtualenv')
     local('python -m virtualenv -p {lang} {env}'.format(lang=lang, env=VENV_DIR))
 
 
@@ -32,6 +33,10 @@ def make():
             local('pip install -r requirements.txt')
             local('python setup.py build_ext --inplace')
 
+def sdist():
+    with virtualenv(VENV_DIR):
+        with lcd(path.dirname(__file__)):
+            local('python setup.py sdist')
 
 def clean():
     with lcd(path.dirname(__file__)):
diff --git a/requirements.txt b/requirements.txt
index 7fa5d72d3..0b46b38d5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,7 @@ pathlib
 numpy>=1.7
 cymem>=1.30,<1.32
 preshed>=1.0.0,<2.0.0
-thinc>=6.8.2,<6.9.0
+thinc>=6.9.0,<6.10.0
 murmurhash>=0.28,<0.29
 plac<1.0.0,>=0.9.6
 six
diff --git a/setup.py b/setup.py
index 8943d7a2e..23b4f9581 100755
--- a/setup.py
+++ b/setup.py
@@ -195,7 +195,7 @@ def setup_package():
                 'murmurhash>=0.28,<0.29',
                 'cymem>=1.30,<1.32',
                 'preshed>=1.0.0,<2.0.0',
-                'thinc>=6.8.2,<6.9.0',
+                'thinc>=6.9.0,<6.10.0',
                 'plac<1.0.0,>=0.9.6',
                 'six',
                 'pathlib',
diff --git a/spacy/__init__.py b/spacy/__init__.py
index 1cb7c0cbd..ba2479106 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -4,11 +4,13 @@ from __future__ import unicode_literals
 from .cli.info import info as cli_info
 from .glossary import explain
 from .deprecated import resolve_load_name
+#from .about import __version__
 from .about import __version__
 from . import util
 
 
 def load(name, **overrides):
+    from .deprecated import resolve_load_name
     name = resolve_load_name(name, **overrides)
     return util.load_model(name, **overrides)
 
diff --git a/spacy/_ml.py b/spacy/_ml.py
index 3b6e4da10..6df10b6b2 100644
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@@ -1,29 +1,27 @@
 import ujson
+from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu, SELU
+from thinc.i2v import HashEmbed, StaticVectors
+from thinc.t2t import ExtractWindow, ParametricAttention
+from thinc.t2v import Pooling, max_pool, mean_pool, sum_pool
+from thinc.misc import Residual
+from thinc.misc import BatchNorm as BN
+from thinc.misc import LayerNorm as LN
+
 from thinc.api import add, layerize, chain, clone, concatenate, with_flatten
-from thinc.neural import Model, Maxout, Softmax, Affine
-from thinc.neural._classes.hash_embed import HashEmbed
+from thinc.api import FeatureExtracter, with_getitem
+from thinc.api import uniqued, wrap, flatten_add_lengths, noop
+
+from thinc.linear.linear import LinearModel
 from thinc.neural.ops import NumpyOps, CupyOps
 from thinc.neural.util import get_array_module
-import thinc.extra.load_nlp
+
 import random
 import cytoolz
 
-from thinc.neural._classes.convolution import ExtractWindow
-from thinc.neural._classes.static_vectors import StaticVectors
-from thinc.neural._classes.batchnorm import BatchNorm as BN
-from thinc.neural._classes.layernorm import LayerNorm as LN
-from thinc.neural._classes.resnet import Residual
-from thinc.neural import ReLu
-from thinc.neural._classes.selu import SELU
 from thinc import describe
 from thinc.describe import Dimension, Synapses, Biases, Gradient
 from thinc.neural._classes.affine import _set_dimensions_if_needed
-from thinc.api import FeatureExtracter, with_getitem
-from thinc.neural.pooling import Pooling, max_pool, mean_pool, sum_pool
-from thinc.neural._classes.attention import ParametricAttention
-from thinc.linear.linear import LinearModel
-from thinc.api import uniqued, wrap, flatten_add_lengths, noop
-
+import thinc.extra.load_nlp
 
 from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP, CLUSTER
 from .tokens.doc import Doc
@@ -32,6 +30,10 @@ from . import util
 import numpy
 import io
 
+# TODO: Unset this once we don't want to support models previous models.
+import thinc.neural._classes.layernorm
+thinc.neural._classes.layernorm.set_compat_six_eight(True)
+
 VECTORS_KEY = 'spacy_pretrained_vectors'
 
 @layerize
diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index 209660529..d9be95fae 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -32,18 +32,25 @@ numpy.random.seed(0)
     model=("Model name or path", "positional", None, str),
     data_path=("Location of JSON-formatted evaluation data", "positional", None, str),
     gold_preproc=("Use gold preprocessing", "flag", "G", bool),
+    gpu_id=("Use GPU", "option", "g", int),
 )
-def evaluate(cmd, model, data_path, gold_preproc=False):
+def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False):
     """
     Train a model. Expects data in spaCy's JSON format.
     """
-    util.set_env_log(True)
+    util.use_gpu(gpu_id)
+    util.set_env_log(False)
     data_path = util.ensure_path(data_path)
     if not data_path.exists():
         prints(data_path, title="Evaluation data not found", exits=1)
     corpus = GoldCorpus(data_path, data_path)
     nlp = util.load_model(model)
-    scorer = nlp.evaluate(list(corpus.dev_docs(nlp, gold_preproc=gold_preproc)))
+    dev_docs = list(corpus.dev_docs(nlp, gold_preproc=gold_preproc))
+    begin = timer()
+    scorer = nlp.evaluate(dev_docs, verbose=False)
+    end = timer()
+    nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
+    print('Time', end-begin, 'words', nwords, 'w.p.s', nwords/(end-begin))
     print_results(scorer)
 
 
diff --git a/spacy/language.py b/spacy/language.py
index 701b5c140..c49c64b1d 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -388,7 +388,7 @@ class Language(object):
         self._optimizer.device = device
         return self._optimizer
 
-    def evaluate(self, docs_golds):
+    def evaluate(self, docs_golds, verbose=False):
         scorer = Scorer()
         docs, golds = zip(*docs_golds)
         docs = list(docs)
@@ -401,7 +401,9 @@ class Language(object):
                 docs = list(pipe.pipe(docs))
         assert len(docs) == len(golds)
         for doc, gold in zip(docs, golds):
-            scorer.score(doc, gold)
+            if verbose:
+                print(doc)
+            scorer.score(doc, gold, verbose=verbose)
         return scorer
 
     @contextmanager
diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx
index 1a12107b7..8d935335c 100644
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@@ -4,7 +4,6 @@
 from __future__ import unicode_literals
 
 from thinc.api import chain, layerize, with_getitem
-from thinc.neural import Model, Softmax
 import numpy
 cimport numpy as np
 import cytoolz
@@ -14,17 +13,18 @@ import ujson
 import msgpack
 
 from thinc.api import add, layerize, chain, clone, concatenate, with_flatten
-from thinc.neural import Model, Maxout, Softmax, Affine
-from thinc.neural._classes.hash_embed import HashEmbed
+from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu, SELU
+from thinc.i2v import HashEmbed
+from thinc.t2v import Pooling, max_pool, mean_pool, sum_pool
+from thinc.t2t import ExtractWindow, ParametricAttention
+from thinc.misc import Residual
+from thinc.misc import BatchNorm as BN
+from thinc.misc import LayerNorm as LN
+
 from thinc.neural.util import to_categorical
 
-from thinc.neural.pooling import Pooling, max_pool, mean_pool
 from thinc.neural._classes.difference import Siamese, CauchySimilarity
 
-from thinc.neural._classes.convolution import ExtractWindow
-from thinc.neural._classes.resnet import Residual
-from thinc.neural._classes.batchnorm import BatchNorm as BN
-
 from .tokens.doc cimport Doc
 from .syntax.parser cimport Parser as LinearParser
 from .syntax.nn_parser cimport Parser as NeuralParser
diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index 87099aa4f..016807e87 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -38,10 +38,9 @@ from preshed.maps cimport MapStruct
 from preshed.maps cimport map_get
 
 from thinc.api import layerize, chain, noop, clone, with_flatten
-from thinc.neural import Model, Affine, ReLu, Maxout
-from thinc.neural._classes.batchnorm import BatchNorm as BN
-from thinc.neural._classes.selu import SELU
-from thinc.neural._classes.layernorm import LayerNorm
+from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu, SELU
+from thinc.misc import LayerNorm
+
 from thinc.neural.ops import NumpyOps, CupyOps
 from thinc.neural.util import get_array_module
 
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index 80b859c70..762ea4c08 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -9,7 +9,8 @@ from .util import get_doc
 
 from pathlib import Path
 import pytest
-from thinc.neural import Maxout, Softmax
+from thinc.neural._classes.maxout import Maxout
+from thinc.neural._classes.softmax import Softmax
 from thinc.api import chain
 
 
diff --git a/spacy/util.py b/spacy/util.py
index 911970831..e1a721a12 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -563,7 +563,10 @@ def minify_html(html):
 
 
 def use_gpu(gpu_id):
-    import cupy.cuda.device
+    try:
+        import cupy.cuda.device
+    except ImportError:
+        return None
     from thinc.neural.ops import CupyOps
     device = cupy.cuda.device.Device(gpu_id)
     device.use()