diff --git a/requirements.txt b/requirements.txt
index 37259e747..aae0f9388 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,7 @@ pathlib
 numpy>=1.7
 cymem>=1.30,<1.32
 preshed>=1.0.0,<2.0.0
-thinc>=6.7.3,<6.8.0
+thinc>=6.8.0,<6.9.0
 murmurhash>=0.28,<0.29
 plac<1.0.0,>=0.9.6
 six
diff --git a/setup.py b/setup.py
index df6afdb2c..ecdf15536 100755
--- a/setup.py
+++ b/setup.py
@@ -192,7 +192,7 @@ def setup_package():
                 'murmurhash>=0.28,<0.29',
                 'cymem>=1.30,<1.32',
                 'preshed>=1.0.0,<2.0.0',
-                'thinc>=6.7.3,<6.8.0',
+                'thinc>=6.8.0,<6.9.0',
                 'plac<1.0.0,>=0.9.6',
                 'pip>=9.0.0,<10.0.0',
                 'six',
diff --git a/spacy/__init__.py b/spacy/__init__.py
index 068282b1a..1cb7c0cbd 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -13,5 +13,10 @@ def load(name, **overrides):
     return util.load_model(name, **overrides)
 
 
+def blank(name, **kwargs):
+    LangClass = util.get_lang_class(name)
+    return LangClass(**kwargs)
+
+
 def info(model=None, markdown=False):
     return cli_info(None, model, markdown)
diff --git a/spacy/_ml.py b/spacy/_ml.py
index 2d0910a53..f1ded666e 100644
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@@ -4,18 +4,22 @@ from thinc.neural import Model, Maxout, Softmax, Affine
 from thinc.neural._classes.hash_embed import HashEmbed
 from thinc.neural.ops import NumpyOps, CupyOps
 from thinc.neural.util import get_array_module
+import random
 
 from thinc.neural._classes.convolution import ExtractWindow
 from thinc.neural._classes.static_vectors import StaticVectors
 from thinc.neural._classes.batchnorm import BatchNorm
 from thinc.neural._classes.resnet import Residual
 from thinc.neural import ReLu
+from thinc.neural._classes.selu import SELU
 from thinc import describe
 from thinc.describe import Dimension, Synapses, Biases, Gradient
 from thinc.neural._classes.affine import _set_dimensions_if_needed
 from thinc.api import FeatureExtracter, with_getitem
-from thinc.neural.pooling import Pooling, max_pool, mean_pool
+from thinc.neural.pooling import Pooling, max_pool, mean_pool, sum_pool
+from thinc.neural._classes.attention import ParametricAttention
 from thinc.linear.linear import LinearModel
+from thinc.api import uniqued, wrap
 
 from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP
 from .tokens.doc import Doc
@@ -367,7 +371,7 @@ def preprocess_doc(docs, drop=0.):
 
 
 def build_text_classifier(nr_class, width=64, **cfg):
-    nr_vector = cfg.get('nr_vector', 1000)
+    nr_vector = cfg.get('nr_vector', 200)
     with Model.define_operators({'>>': chain, '+': add, '|': concatenate, '**': clone}):
         embed_lower = HashEmbed(width, nr_vector, column=1)
         embed_prefix = HashEmbed(width//2, nr_vector, column=2)
@@ -378,25 +382,26 @@ def build_text_classifier(nr_class, width=64, **cfg):
             FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE])
             >> _flatten_add_lengths
             >> with_getitem(0,
-                (embed_lower | embed_prefix | embed_suffix | embed_shape) 
-                >> Maxout(width, width+(width//2)*3)
+                uniqued(
+                  (embed_lower | embed_prefix | embed_suffix | embed_shape) 
+                  >> Maxout(width, width+(width//2)*3))
                 >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3))
                 >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3))
                 >> Residual(ExtractWindow(nW=1) >> ReLu(width, width*3))
             )
-            >> Pooling(mean_pool, max_pool)
-            >> Residual(ReLu(width*2, width*2))
+            >> ParametricAttention(width,)
+            >> Pooling(sum_pool)
+            >> ReLu(width, width)
+            >> zero_init(Affine(nr_class, width, drop_factor=0.0))
         )
         linear_model = (
             _preprocess_doc
-            >> LinearModel(nr_class)
-            >> logistic
+            >> LinearModel(nr_class, drop_factor=0.)
         )
 
         model = (
-            #(linear_model | cnn_model)
-            cnn_model
-            >> zero_init(Affine(nr_class, width*2+nr_class, drop_factor=0.0))
+            (linear_model | cnn_model)
+            >> zero_init(Affine(nr_class, nr_class*2, drop_factor=0.0))
             >> logistic
         )
  
diff --git a/spacy/about.py b/spacy/about.py
index 7cee56422..4bac17503 100644
--- a/spacy/about.py
+++ b/spacy/about.py
@@ -3,7 +3,7 @@
 # https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py
 
 __title__ = 'spacy-nightly'
-__version__ = '2.0.0a4'
+__version__ = '2.0.0a6'
 __summary__ = 'Industrial-strength Natural Language Processing (NLP) with Python and Cython'
 __uri__ = 'https://spacy.io'
 __author__ = 'Explosion AI'
diff --git a/spacy/compat.py b/spacy/compat.py
index 848ea816a..c2ab27d7e 100644
--- a/spacy/compat.py
+++ b/spacy/compat.py
@@ -5,6 +5,7 @@ import six
 import ftfy
 import sys
 import ujson
+import itertools
 
 from thinc.neural.util import copy_array
 
@@ -35,6 +36,7 @@ CudaStream = CudaStream
 cupy = cupy
 fix_text = ftfy.fix_text
 copy_array = copy_array
+izip = getattr(itertools, 'izip', zip)
 
 is_python2 = six.PY2
 is_python3 = six.PY3
diff --git a/spacy/language.py b/spacy/language.py
index de25157fb..fad2e2119 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -10,6 +10,7 @@ from thinc.neural.optimizers import Adam, SGD
 import random
 import ujson
 from collections import OrderedDict
+import itertools
 
 from .tokenizer import Tokenizer
 from .vocab import Vocab
@@ -25,7 +26,7 @@ from .pipeline import SimilarityHook
 from .pipeline import TextCategorizer
 from . import about
 
-from .compat import json_dumps
+from .compat import json_dumps, izip
 from .attrs import IS_STOP
 from .lang.punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
 from .lang.tokenizer_exceptions import TOKEN_MATCH
@@ -411,7 +412,7 @@ class Language(object):
             except StopIteration:
                 pass
 
-    def pipe(self, texts, n_threads=2, batch_size=1000, disable=[]):
+    def pipe(self, texts, tuples=False, n_threads=2, batch_size=1000, disable=[]):
         """Process texts as a stream, and yield `Doc` objects in order. Supports
         GIL-free multi-threading.
 
@@ -427,8 +428,16 @@ class Language(object):
             >>>     for doc in nlp.pipe(texts, batch_size=50, n_threads=4):
             >>>         assert doc.is_parsed
         """
+        if tuples:
+            text_context1, text_context2 = itertools.tee(texts)
+            texts = (tc[0] for tc in text_context1)
+            contexts = (tc[1] for tc in text_context2)
+            docs = self.pipe(texts, n_threads=n_threads, batch_size=batch_size,
+                             disable=disable)
+            for doc, context in izip(docs, contexts):
+                yield (doc, context)
+            return
         docs = (self.make_doc(text) for text in texts)
-        docs = texts
         for proc in self.pipeline:
             name = getattr(proc, 'name', None)
             if name in disable:
diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx
index d48cae26d..947f0a1f1 100644
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@@ -417,7 +417,7 @@ class NeuralTagger(BaseThincComponent):
             ('vocab', lambda p: self.vocab.from_disk(p)),
             ('tag_map', load_tag_map),
             ('model', load_model),
-            ('cfg', lambda p: self.cfg.update(ujson.load(p.open()))),
+            ('cfg', lambda p: self.cfg.update(_load_cfg(p)))
         ))
         util.from_disk(path, deserialize, exclude)
         return self