Wrap try/except around model saving

2025-10-30 23:47:31 +03:00 · 2017-10-05 08:14:24 -05:00 · 2017-10-05 08:14:24 -05:00 · c6cd81f192
commit c6cd81f192
parent 5743b06e36 b621a2e964
331 changed files with 10443 additions and 10377 deletions
--- a/.appveyor.yml
+++ b/.appveyor.yml
@ -1 +1,55 @@
+environment:
+
+  matrix:
+
+    # For Python versions available on Appveyor, see
+    # http://www.appveyor.com/docs/installed-software#python
+    # The list here is complete (excluding Python 2.6, which
+    # isn't covered by this document) at the time of writing.
+
+    - PYTHON: "C:\\Python27"
+    #- PYTHON: "C:\\Python33"
+    #- PYTHON: "C:\\Python34"
+    #- PYTHON: "C:\\Python35"
+    #- PYTHON: "C:\\Python27-x64"
+    #- PYTHON: "C:\\Python33-x64"
+    #- DISTUTILS_USE_SDK: "1"
+    #- PYTHON: "C:\\Python34-x64"
+    #- DISTUTILS_USE_SDK: "1"
+    #- PYTHON: "C:\\Python35-x64"
+    - PYTHON: "C:\\Python36-x64"
+
+install:
+  # We need wheel installed to build wheels
+  - "%PYTHON%\\python.exe -m pip install wheel"
+  - "%PYTHON%\\python.exe -m pip install cython"
+  - "%PYTHON%\\python.exe -m pip install -r requirements.txt"
+  - "%PYTHON%\\python.exe -m pip install -e ."
+
 build: off
+
+test_script:
+  # Put your test command here.
+  # If you don't need to build C extensions on 64-bit Python 3.3 or 3.4,
+  # you can remove "build.cmd" from the front of the command, as it's
+  # only needed to support those cases.
+  # Note that you must use the environment variable %PYTHON% to refer to
+  # the interpreter you're using - Appveyor does not do anything special
+  # to put the Python version you want to use on PATH.
+  - "%PYTHON%\\python.exe -m pytest spacy/"
+
+after_test:
+  # This step builds your wheels.
+  # Again, you only need build.cmd if you're building C extensions for
+  # 64-bit Python 3.3/3.4. And you need to use %PYTHON% to get the correct
+  # interpreter
+  - "%PYTHON%\\python.exe setup.py bdist_wheel"
+
+artifacts:
+  # bdist_wheel puts your built wheel in the dist directory
+  - path: dist\*
+
+#on_success:
+#  You can use this step to upload your artifacts to a public website.
+#  See Appveyor's documentation for more details. Or you can simply
+#  access your wheels from the Appveyor "artifacts" tab for your build.
--- a/.buildkite/sdist.yml
+++ b/.buildkite/sdist.yml
@ -0,0 +1,11 @@
+steps:
+  -
+    command: "fab env clean make test sdist"
+    label: ":dizzy: :python:"
+    artifact_paths: "dist/*.tar.gz"
+  - wait
+  - trigger: "spacy-sdist-against-models"
+    label: ":dizzy: :hammer:"
+    build:
+      env:
+        SPACY_VERSION: "{$SPACY_VERSION}"
--- a/.gitignore
+++ b/.gitignore
@ -1,14 +1,12 @@
 # spaCy
 spacy/data/
 corpora/
-models/
+/models/
 keys/

 # Website
 website/www/
 website/_deploy.sh
-website/package.json
-website/announcement.jade
 website/.gitignore

 # Cython / C extensions
--- a/examples/chainer_sentiment.py
+++ b/examples/chainer_sentiment.py
@ -1,322 +0,0 @@
-'''WIP --- Doesn't work well yet'''
-import plac
-import random
-import six
-
-import cProfile
-import pstats
-
-import pathlib
-import cPickle as pickle
-from itertools import izip
-
-import spacy
-
-import cytoolz
-import cupy as xp
-import cupy.cuda
-import chainer.cuda
-
-import chainer.links as L
-import chainer.functions as F
-from chainer import Chain, Variable, report
-import chainer.training
-import chainer.optimizers
-from chainer.training import extensions
-from chainer.iterators import SerialIterator
-from chainer.datasets import TupleDataset
-
-
-class SentimentAnalyser(object):
-    @classmethod
-    def load(cls, path, nlp, max_length=100):
-        raise NotImplementedError
-        #with (path / 'config.json').open() as file_:
-        #    model = model_from_json(file_.read())
-        #with (path / 'model').open('rb') as file_:
-        #    lstm_weights = pickle.load(file_)
-        #embeddings = get_embeddings(nlp.vocab)
-        #model.set_weights([embeddings] + lstm_weights)
-        #return cls(model, max_length=max_length)
-
-    def __init__(self, model, max_length=100):
-        self._model = model
-        self.max_length = max_length
-
-    def __call__(self, doc):
-        X = get_features([doc], self.max_length)
-        y = self._model.predict(X)
-        self.set_sentiment(doc, y)
-
-    def pipe(self, docs, batch_size=1000, n_threads=2):
-        for minibatch in cytoolz.partition_all(batch_size, docs):
-            minibatch = list(minibatch)
-            sentences = []
-            for doc in minibatch:
-                sentences.extend(doc.sents)
-            Xs = get_features(sentences, self.max_length)
-            ys = self._model.predict(Xs)
-            for sent, label in zip(sentences, ys):
-                sent.doc.sentiment += label - 0.5
-            for doc in minibatch:
-                yield doc
-
-    def set_sentiment(self, doc, y):
-        doc.sentiment = float(y[0])
-        # Sentiment has a native slot for a single float.
-        # For arbitrary data storage, there's:
-        # doc.user_data['my_data'] = y
-
-
-class Classifier(Chain):
-    def __init__(self, predictor):
-        super(Classifier, self).__init__(predictor=predictor)
-
-    def __call__(self, x, t):
-        y = self.predictor(x)
-        loss = F.softmax_cross_entropy(y, t)
-        accuracy = F.accuracy(y, t)
-        report({'loss': loss, 'accuracy': accuracy}, self)
-        return loss
-
-
-class SentimentModel(Chain):
-    def __init__(self, nlp, shape, **settings):
-        Chain.__init__(self,
-            embed=_Embed(shape['nr_vector'], shape['nr_dim'], shape['nr_hidden'],
-                set_vectors=lambda arr: set_vectors(arr, nlp.vocab)),
-            encode=_Encode(shape['nr_hidden'], shape['nr_hidden']),
-            attend=_Attend(shape['nr_hidden'], shape['nr_hidden']),
-            predict=_Predict(shape['nr_hidden'], shape['nr_class']))
-        self.to_gpu(0)
-
-    def __call__(self, sentence):
-        return self.predict(
-                  self.attend(
-                      self.encode(
-                          self.embed(sentence))))
-
-
-class _Embed(Chain):
-    def __init__(self, nr_vector, nr_dim, nr_out, set_vectors=None):
-        Chain.__init__(self,
-            embed=L.EmbedID(nr_vector, nr_dim, initialW=set_vectors),
-            project=L.Linear(None, nr_out, nobias=True))
-        self.embed.W.volatile = False
-
-    def __call__(self, sentence):
-        return [self.project(self.embed(ts)) for ts in F.transpose(sentence)]
-
-
-class _Encode(Chain):
-    def __init__(self, nr_in, nr_out):
-        Chain.__init__(self,
-            fwd=L.LSTM(nr_in, nr_out),
-            bwd=L.LSTM(nr_in, nr_out),
-            mix=L.Bilinear(nr_out, nr_out, nr_out))
-
-    def __call__(self, sentence):
-        self.fwd.reset_state()
-        fwds = map(self.fwd, sentence)
-        self.bwd.reset_state()
-        bwds = reversed(map(self.bwd, reversed(sentence)))
-        return [F.elu(self.mix(f, b)) for f, b in zip(fwds, bwds)]
-
-
-class _Attend(Chain):
-    def __init__(self, nr_in, nr_out):
-        Chain.__init__(self)
-
-    def __call__(self, sentence):
-        sent = sum(sentence)
-        return sent
-
-
-class _Predict(Chain):
-    def __init__(self, nr_in, nr_out):
-        Chain.__init__(self,
-            l1=L.Linear(nr_in, nr_in),
-            l2=L.Linear(nr_in, nr_out))
-
-    def __call__(self, vector):
-        vector = self.l1(vector)
-        vector = F.elu(vector)
-        vector = self.l2(vector)
-        return vector
-
-
-class SentenceDataset(TupleDataset):
-    def __init__(self, nlp, texts, labels, max_length):
-        self.max_length = max_length
-        sents, labels = self._get_labelled_sentences(
-            nlp.pipe(texts, batch_size=5000, n_threads=3),
-            labels)
-        TupleDataset.__init__(self,
-            get_features(sents, max_length),
-            labels)
-
-    def __getitem__(self, index):
-        batches = [dataset[index] for dataset in self._datasets]
-        if isinstance(index, slice):
-            length = len(batches[0])
-            returns = [tuple([batch[i] for batch in batches])
-                       for i in six.moves.range(length)]
-            return returns
-        else:
-            return tuple(batches)
-
-    def _get_labelled_sentences(self, docs, doc_labels):
-        labels = []
-        sentences = []
-        for doc, y in izip(docs, doc_labels):
-            for sent in doc.sents:
-                sentences.append(sent)
-                labels.append(y)
-        return sentences, xp.asarray(labels, dtype='i')
-
-
-class DocDataset(TupleDataset):
-    def __init__(self, nlp, texts, labels):
-        self.max_length = max_length
-        DatasetMixin.__init__(self,
-            get_features(
-                nlp.pipe(texts, batch_size=5000, n_threads=3), self.max_length),
-            labels)
-
-def read_data(data_dir, limit=0):
-    examples = []
-    for subdir, label in (('pos', 1), ('neg', 0)):
-        for filename in (data_dir / subdir).iterdir():
-            with filename.open() as file_:
-                text = file_.read()
-            examples.append((text, label))
-    random.shuffle(examples)
-    if limit >= 1:
-        examples = examples[:limit]
-    return zip(*examples) # Unzips into two lists
-
-
-def get_features(docs, max_length):
-    docs = list(docs)
-    Xs = xp.zeros((len(docs), max_length), dtype='i')
-    for i, doc in enumerate(docs):
-        j = 0
-        for token in doc:
-            if token.has_vector and not token.is_punct and not token.is_space:
-                Xs[i, j] = token.norm
-                j += 1
-                if j >= max_length:
-                    break
-    return Xs
-
-
-def set_vectors(vectors, vocab):
-    for lex in vocab:
-        if lex.has_vector and (lex.rank+1) < vectors.shape[0]:
-            lex.norm = lex.rank+1
-            vectors[lex.rank + 1] = lex.vector
-        else:
-            lex.norm = 0
-    return vectors
-
-
-def train(train_texts, train_labels, dev_texts, dev_labels,
-        lstm_shape, lstm_settings, lstm_optimizer, batch_size=100, nb_epoch=5,
-        by_sentence=True):
-    nlp = spacy.load('en', entity=False)
-    if 'nr_vector' not in lstm_shape:
-        lstm_shape['nr_vector'] = max(lex.rank+1 for lex in nlp.vocab if lex.has_vector)
-    if 'nr_dim' not in lstm_shape:
-        lstm_shape['nr_dim'] = nlp.vocab.vectors_length
-    print("Make model")
-    model = Classifier(SentimentModel(nlp, lstm_shape, **lstm_settings))
-    print("Parsing texts...")
-    if by_sentence:
-        train_data = SentenceDataset(nlp, train_texts, train_labels, lstm_shape['max_length'])
-        dev_data = SentenceDataset(nlp, dev_texts, dev_labels, lstm_shape['max_length'])
-    else:
-        train_data = DocDataset(nlp, train_texts, train_labels)
-        dev_data = DocDataset(nlp, dev_texts, dev_labels)
-    train_iter = SerialIterator(train_data, batch_size=batch_size,
-                                shuffle=True, repeat=True)
-    dev_iter = SerialIterator(dev_data, batch_size=batch_size,
-                              shuffle=False, repeat=False)
-    optimizer = chainer.optimizers.Adam()
-    optimizer.setup(model)
-    updater = chainer.training.StandardUpdater(train_iter, optimizer, device=0)
-    trainer = chainer.training.Trainer(updater, (1, 'epoch'), out='result')
-
-    trainer.extend(extensions.Evaluator(dev_iter, model, device=0))
-    trainer.extend(extensions.LogReport())
-    trainer.extend(extensions.PrintReport([
-        'epoch', 'main/accuracy', 'validation/main/accuracy']))
-    trainer.extend(extensions.ProgressBar())
-    
-    trainer.run()
-
-
-def evaluate(model_dir, texts, labels, max_length=100):
-    def create_pipeline(nlp):
-        '''
-        This could be a lambda, but named functions are easier to read in Python.
-        '''
-        return [nlp.tagger, nlp.parser, SentimentAnalyser.load(model_dir, nlp,
-                                                               max_length=max_length)]
-    
-    nlp = spacy.load('en')
-    nlp.pipeline = create_pipeline(nlp)
-
-    correct = 0
-    i = 0 
-    for doc in nlp.pipe(texts, batch_size=1000, n_threads=4):
-        correct += bool(doc.sentiment >= 0.5) == bool(labels[i])
-        i += 1
-    return float(correct) / i
-
-
-@plac.annotations(
-    train_dir=("Location of training file or directory"),
-    dev_dir=("Location of development file or directory"),
-    model_dir=("Location of output model directory",),
-    is_runtime=("Demonstrate run-time usage", "flag", "r", bool),
-    nr_hidden=("Number of hidden units", "option", "H", int),
-    max_length=("Maximum sentence length", "option", "L", int),
-    dropout=("Dropout", "option", "d", float),
-    learn_rate=("Learn rate", "option", "e", float),
-    nb_epoch=("Number of training epochs", "option", "i", int),
-    batch_size=("Size of minibatches for training LSTM", "option", "b", int),
-    nr_examples=("Limit to N examples", "option", "n", int)
-)
-def main(model_dir, train_dir, dev_dir,
-         is_runtime=False,
-         nr_hidden=64, max_length=100, # Shape
-         dropout=0.5, learn_rate=0.001, # General NN config
-         nb_epoch=5, batch_size=32, nr_examples=-1):  # Training params
-    model_dir = pathlib.Path(model_dir)
-    train_dir = pathlib.Path(train_dir)
-    dev_dir = pathlib.Path(dev_dir)
-    if is_runtime:
-        dev_texts, dev_labels = read_data(dev_dir)
-        acc = evaluate(model_dir, dev_texts, dev_labels, max_length=max_length)
-        print(acc)
-    else:
-        print("Read data")
-        train_texts, train_labels = read_data(train_dir, limit=nr_examples)
-        dev_texts, dev_labels = read_data(dev_dir, limit=nr_examples)
-        print("Using GPU 0")
-        #chainer.cuda.get_device(0).use()
-        train_labels = xp.asarray(train_labels, dtype='i')
-        dev_labels = xp.asarray(dev_labels, dtype='i')
-        lstm = train(train_texts, train_labels, dev_texts, dev_labels,
-                     {'nr_hidden': nr_hidden, 'max_length': max_length, 'nr_class': 2,
-                      'nr_vector': 5000},
-                      {'dropout': 0.5, 'lr': learn_rate},
-                      {},
-                      nb_epoch=nb_epoch, batch_size=batch_size)
-
-
-if __name__ == '__main__':
-    #cProfile.runctx("plac.call(main)", globals(), locals(), "Profile.prof")
-    #s = pstats.Stats("Profile.prof")
-    #s.strip_dirs().sort_stats("time").print_stats()
-    plac.call(main)
--- a/examples/multi_word_matches.py
+++ b/examples/multi_word_matches.py
@ -20,71 +20,71 @@ The algorithm is O(n) at run-time for document of length n because we're only ev
 matching over the tag patterns. So no matter how many phrases we're looking for,
 our pattern set stays very small (exact size depends on the maximum length we're
 looking for, as the query language currently has no quantifiers)
+
+The example expects a .bz2 file from the Reddit corpus, and a patterns file,
+formatted in jsonl as a sequence of entries like this:
+
+{"text":"Anchorage"}
+{"text":"Angola"}
+{"text":"Ann Arbor"}
+{"text":"Annapolis"}
+{"text":"Appalachia"}
+{"text":"Argentina"}
 """
 from __future__ import print_function, unicode_literals, division
-from ast import literal_eval
 from bz2 import BZ2File
 import time
 import math
 import codecs

 import plac
+import ujson

-from preshed.maps import PreshMap
-from preshed.counter import PreshCounter
-from spacy.strings import hash_string
-from spacy.en import English
 from spacy.matcher import PhraseMatcher
+import spacy


 def read_gazetteer(tokenizer, loc, n=-1):
    for i, line in enumerate(open(loc)):
-        phrase = literal_eval('u' + line.strip())
-        if ' (' in phrase and phrase.endswith(')'):
-            phrase = phrase.split(' (', 1)[0]
-        if i >= n:
-            break
-        phrase = tokenizer(phrase)
-        if all((t.is_lower and t.prob >= -10) for t in phrase):
-            continue
+        data = ujson.loads(line.strip())
+        phrase = tokenizer(data['text'])
+        for w in phrase:
+            _ = tokenizer.vocab[w.text]
        if len(phrase) >= 2:
            yield phrase


-def read_text(bz2_loc):
+def read_text(bz2_loc, n=10000):
    with BZ2File(bz2_loc) as file_:
-        for line in file_:
-            yield line.decode('utf8')
+        for i, line in enumerate(file_):
+            data = ujson.loads(line)
+            yield data['body']
+            if i >= n:
+                break


 def get_matches(tokenizer, phrases, texts, max_length=6):
-    matcher = PhraseMatcher(tokenizer.vocab, phrases, max_length=max_length)
-    print("Match")
+    matcher = PhraseMatcher(tokenizer.vocab, max_length=max_length)
+    matcher.add('Phrase', None, *phrases)
    for text in texts:
        doc = tokenizer(text)
+        for w in doc:
+            _ = doc.vocab[w.text]
        matches = matcher(doc)
-        for mwe in doc.ents:
-            yield mwe
+        for ent_id, start, end in matches:
+            yield (ent_id, doc[start:end].text)


-def main(patterns_loc, text_loc, counts_loc, n=10000000):
-    nlp = English(parser=False, tagger=False, entity=False)
-    print("Make matcher")
-    phrases = read_gazetteer(nlp.tokenizer, patterns_loc, n=n)
-    counts = PreshCounter()
+def main(patterns_loc, text_loc, n=10000):
+    nlp = spacy.blank('en')
+    nlp.vocab.lex_attr_getters = {}
+    phrases = read_gazetteer(nlp.tokenizer, patterns_loc)
+    count = 0
    t1 = time.time()
-    for mwe in get_matches(nlp.tokenizer, phrases, read_text(text_loc)):
-        counts.inc(hash_string(mwe.text), 1)
+    for ent_id, text in get_matches(nlp.tokenizer, phrases, read_text(text_loc, n=n)):
+        count += 1
    t2 = time.time()
-    print("10m tokens in %d s" % (t2 - t1))
-    
-    with codecs.open(counts_loc, 'w', 'utf8') as file_:
-        for phrase in read_gazetteer(nlp.tokenizer, patterns_loc, n=n):
-            text = phrase.string
-            key = hash_string(text)
-            count = counts[key]
-            if count != 0:
-                file_.write('%d\t%s\n' % (count, text))
+    print("%d docs in %.3f s. %d matches" % (n, (t2 - t1), count))


 if __name__ == '__main__':
--- a/examples/training/train_ner_standalone.py
+++ b/examples/training/train_ner_standalone.py
@ -13,24 +13,29 @@ Input data:
 https://www.lt.informatik.tu-darmstadt.de/fileadmin/user_upload/Group_LangTech/data/GermEval2014_complete_data.zip

 Developed for: spaCy 1.7.1
-Last tested for: spaCy 1.7.1
+Last tested for: spaCy 2.0.0a13
 '''
 from __future__ import unicode_literals, print_function
 import plac
 from pathlib import Path
 import random
 import json
+import tqdm
+
+from thinc.neural.optimizers import Adam
+from thinc.neural.ops import NumpyOps

-import spacy.orth as orth_funcs
 from spacy.vocab import Vocab
-from spacy.pipeline import BeamEntityRecognizer
-from spacy.pipeline import EntityRecognizer
+from spacy.pipeline import TokenVectorEncoder, NeuralEntityRecognizer
 from spacy.tokenizer import Tokenizer
 from spacy.tokens import Doc
 from spacy.attrs import *
 from spacy.gold import GoldParse
-from spacy.gold import _iob_to_biluo as iob_to_biluo
+from spacy.gold import iob_to_biluo
+from spacy.gold import minibatch
 from spacy.scorer import Scorer
+import spacy.util
+

 try:
    unicode
@ -38,96 +43,38 @@ except NameError:
    unicode = str


+spacy.util.set_env_log(True)
+
+
 def init_vocab():
    return Vocab(
        lex_attr_getters={
            LOWER: lambda string: string.lower(),
-            SHAPE: orth_funcs.word_shape,
+            NORM: lambda string: string.lower(),
            PREFIX: lambda string: string[0],
            SUFFIX: lambda string: string[-3:],
-            CLUSTER: lambda string: 0,
-            IS_ALPHA: orth_funcs.is_alpha,
-            IS_ASCII: orth_funcs.is_ascii,
-            IS_DIGIT: lambda string: string.isdigit(),
-            IS_LOWER: orth_funcs.is_lower,
-            IS_PUNCT: orth_funcs.is_punct,
-            IS_SPACE: lambda string: string.isspace(),
-            IS_TITLE: orth_funcs.is_title,
-            IS_UPPER: orth_funcs.is_upper,
-            IS_STOP: lambda string: False,
-            IS_OOV: lambda string: True
        })


-def save_vocab(vocab, path):
-    path = Path(path)
-    if not path.exists():
-        path.mkdir()
-    elif not path.is_dir():
-        raise IOError("Can't save vocab to %s\nNot a directory" % path)
-    with (path / 'strings.json').open('w') as file_:
-        vocab.strings.dump(file_)
-    vocab.dump((path / 'lexemes.bin').as_posix())
-
-
-def load_vocab(path):
-    path = Path(path)
-    if not path.exists():
-        raise IOError("Cannot load vocab from %s\nDoes not exist" % path)
-    if not path.is_dir():
-        raise IOError("Cannot load vocab from %s\nNot a directory" % path)
-    return Vocab.load(path)
-
-
-def init_ner_model(vocab, features=None):
-    if features is None:
-        features = tuple(EntityRecognizer.feature_templates)
-    return EntityRecognizer(vocab, features=features)
-
-
-def save_ner_model(model, path):
-    path = Path(path)
-    if not path.exists():
-        path.mkdir()
-    if not path.is_dir():
-        raise IOError("Can't save model to %s\nNot a directory" % path)
-    model.model.dump((path / 'model').as_posix())
-    with (path / 'config.json').open('w') as file_:
-        data = json.dumps(model.cfg)
-        if not isinstance(data, unicode):
-            data = data.decode('utf8')
-        file_.write(data)
-
-
-def load_ner_model(vocab, path):
-    return EntityRecognizer.load(path, vocab)
-
-
 class Pipeline(object):
-    @classmethod
-    def load(cls, path):
-        path = Path(path)
-        if not path.exists():
-            raise IOError("Cannot load pipeline from %s\nDoes not exist" % path)
-        if not path.is_dir():
-            raise IOError("Cannot load pipeline from %s\nNot a directory" % path)
-        vocab = load_vocab(path)
-        tokenizer = Tokenizer(vocab, {}, None, None, None)
-        ner_model = load_ner_model(vocab, path / 'ner')
-        return cls(vocab, tokenizer, ner_model)
-
    def __init__(self, vocab=None, tokenizer=None, entity=None):
        if vocab is None:
            vocab = init_vocab()
        if tokenizer is None:
            tokenizer = Tokenizer(vocab, {}, None, None, None)
        if entity is None:
-            entity = init_ner_model(self.vocab)
+            entity = NeuralEntityRecognizer(vocab)
        self.vocab = vocab
        self.tokenizer = tokenizer
        self.entity = entity
        self.pipeline = [self.entity]

+    def begin_training(self):
+        for model in self.pipeline:
+            model.begin_training([])
+        optimizer = Adam(NumpyOps(), 0.001)
+        return optimizer
+
    def __call__(self, input_):
        doc = self.make_doc(input_)
        for process in self.pipeline:
@ -147,14 +94,16 @@ class Pipeline(object):
        gold = GoldParse(doc, entities=annotations)
        return gold

-    def update(self, input_, annot):
-        doc = self.make_doc(input_)
-        gold = self.make_gold(input_, annot)
-        for ner in gold.ner:
-            if ner not in (None, '-', 'O'):
-                action, label = ner.split('-', 1)
-                self.entity.add_label(label)
-        return self.entity.update(doc, gold)
+    def update(self, inputs, annots, sgd, losses=None, drop=0.):
+        if losses is None:
+            losses = {}
+        docs = [self.make_doc(input_) for input_ in inputs]
+        golds = [self.make_gold(input_, annot) for input_, annot in
+                 zip(inputs, annots)]
+
+        self.entity.update(docs, golds, drop=drop,
+                           sgd=sgd, losses=losses)
+        return losses

    def evaluate(self, examples):
        scorer = Scorer()
@ -164,34 +113,36 @@ class Pipeline(object):
            scorer.score(doc, gold)
        return scorer.scores

-    def average_weights(self):
-        self.entity.model.end_training()
-
-    def save(self, path):
+    def to_disk(self, path):
        path = Path(path)
        if not path.exists():
            path.mkdir()
        elif not path.is_dir():
            raise IOError("Can't save pipeline to %s\nNot a directory" % path)
-        save_vocab(self.vocab, path / 'vocab')
-        save_ner_model(self.entity, path / 'ner')
+        self.vocab.to_disk(path / 'vocab')
+        self.entity.to_disk(path / 'ner')
+
+    def from_disk(self, path):
+        path = Path(path)
+        if not path.exists():
+            raise IOError("Cannot load pipeline from %s\nDoes not exist" % path)
+        if not path.is_dir():
+            raise IOError("Cannot load pipeline from %s\nNot a directory" % path)
+        self.vocab = self.vocab.from_disk(path / 'vocab')
+        self.entity = self.entity.from_disk(path / 'ner')


-def train(nlp, train_examples, dev_examples, ctx, nr_epoch=5):
-    next_epoch = train_examples
+def train(nlp, train_examples, dev_examples, nr_epoch=5):
+    sgd = nlp.begin_training()
    print("Iter", "Loss", "P", "R", "F")
    for i in range(nr_epoch):
-        this_epoch = next_epoch
-        next_epoch = []
-        loss = 0
-        for input_, annot in this_epoch:
-            loss += nlp.update(input_, annot)
-            if (i+1) < nr_epoch:
-                next_epoch.append((input_, annot))
-        random.shuffle(next_epoch)
+        random.shuffle(train_examples)
+        losses = {}
+        for batch in minibatch(tqdm.tqdm(train_examples, leave=False), size=8):
+            inputs, annots = zip(*batch)
+            nlp.update(list(inputs), list(annots), sgd, losses=losses)
        scores = nlp.evaluate(dev_examples)
-        report_scores(i, loss, scores)
-    nlp.average_weights()
+        report_scores(i, losses['ner'], scores)
    scores = nlp.evaluate(dev_examples)
    report_scores(channels, i+1, loss, scores)

@ -208,7 +159,8 @@ def read_examples(path):
    with path.open() as file_:
        sents = file_.read().strip().split('\n\n')
        for sent in sents:
-            if not sent.strip():
+            sent = sent.strip()
+            if not sent:
                continue
            tokens = sent.split('\n')
            while tokens and tokens[0].startswith('#'):
@ -217,28 +169,39 @@ def read_examples(path):
            iob = []
            for token in tokens:
                if token.strip():
-                    pieces = token.split()
+                    pieces = token.split('\t')
                    words.append(pieces[1])
                    iob.append(pieces[2])
            yield words, iob_to_biluo(iob)


+def get_labels(examples):
+    labels = set()
+    for words, tags in examples:
+        for tag in tags:
+            if '-' in tag:
+                labels.add(tag.split('-')[1])
+    return sorted(labels)
+
+
@plac.annotations(
    model_dir=("Path to save the model", "positional", None, Path),
    train_loc=("Path to your training data", "positional", None, Path),
    dev_loc=("Path to your development data", "positional", None, Path),
 )
-def main(model_dir=Path('/home/matt/repos/spaCy/spacy/data/de-1.0.0'),
-        train_loc=None, dev_loc=None, nr_epoch=30):
-    
-    train_examples = read_examples(train_loc)
+def main(model_dir, train_loc, dev_loc, nr_epoch=30):
+    print(model_dir, train_loc, dev_loc)
+    train_examples = list(read_examples(train_loc))
    dev_examples = read_examples(dev_loc)
-    nlp = Pipeline.load(model_dir)
+    nlp = Pipeline()
+    for label in get_labels(train_examples):
+        nlp.entity.add_label(label)
+        print("Add label", label)

-    train(nlp, train_examples, list(dev_examples), ctx, nr_epoch)
+    train(nlp, train_examples, list(dev_examples), nr_epoch)

-    nlp.save(model_dir)
+    nlp.to_disk(model_dir)


 if __name__ == '__main__':
-    main()
+    plac.call(main)
--- a/examples/training/train_new_entity_type.py
+++ b/examples/training/train_new_entity_type.py
@ -25,7 +25,7 @@ For more details, see the documentation:
 * Saving and loading models: https://spacy.io/docs/usage/saving-loading

 Developed for: spaCy 1.7.6
-Last tested for: spaCy 1.7.6
+Last updated for: spaCy 2.0.0a13
 """
 from __future__ import unicode_literals, print_function

@ -34,55 +34,41 @@ from pathlib import Path
 import random

 import spacy
-from spacy.gold import GoldParse
-from spacy.tagger import Tagger
+from spacy.gold import GoldParse, minibatch
+from spacy.pipeline import NeuralEntityRecognizer
+from spacy.pipeline import TokenVectorEncoder
+
+
+def get_gold_parses(tokenizer, train_data):
+    '''Shuffle and create GoldParse objects'''
+    random.shuffle(train_data)
+    for raw_text, entity_offsets in train_data:
+        doc = tokenizer(raw_text)
+        gold = GoldParse(doc, entities=entity_offsets)
+        yield doc, gold

 
 def train_ner(nlp, train_data, output_dir):
-    # Add new words to vocab
-    for raw_text, _ in train_data:
-        doc = nlp.make_doc(raw_text)
-        for word in doc:
-            _ = nlp.vocab[word.orth]
    random.seed(0)
-    # You may need to change the learning rate. It's generally difficult to
-    # guess what rate you should set, especially when you have limited data.
-    nlp.entity.model.learn_rate = 0.001
-    for itn in range(1000):
-        random.shuffle(train_data)
-        loss = 0.
-        for raw_text, entity_offsets in train_data:
-            gold = GoldParse(doc, entities=entity_offsets)
-            # By default, the GoldParse class assumes that the entities
-            # described by offset are complete, and all other words should
-            # have the tag 'O'. You can tell it to make no assumptions
-            # about the tag of a word by giving it the tag '-'.
-            # However, this allows a trivial solution to the current
-            # learning problem: if words are either 'any tag' or 'ANIMAL',
-            # the model can learn that all words can be tagged 'ANIMAL'.
-            #for i in range(len(gold.ner)):
-                #if not gold.ner[i].endswith('ANIMAL'):
-                #    gold.ner[i] = '-'
-            doc = nlp.make_doc(raw_text)
-            nlp.tagger(doc)
-            # As of 1.9, spaCy's parser now lets you supply a dropout probability
-            # This might help the model generalize better from only a few
-            # examples.
-            loss += nlp.entity.update(doc, gold, drop=0.9)
-        if loss == 0:
-            break
-    # This step averages the model's weights. This may or may not be good for
-    # your situation --- it's empirical.
-    nlp.end_training()
-    if output_dir:
-        if not output_dir.exists():
-            output_dir.mkdir()
-        nlp.save_to_directory(output_dir)
+    optimizer = nlp.begin_training(lambda: [])
+    nlp.meta['name'] = 'en_ent_animal'
+    for itn in range(50):
+        losses = {}
+        for batch in minibatch(get_gold_parses(nlp.make_doc, train_data), size=3):
+            docs, golds = zip(*batch)
+            nlp.update(docs, golds, losses=losses, sgd=optimizer, update_shared=True,
+                       drop=0.35)
+        print(losses)
+    if not output_dir:
+        return
+    elif not output_dir.exists():
+        output_dir.mkdir()
+    nlp.to_disk(output_dir)


 def main(model_name, output_directory=None):
-    print("Loading initial model", model_name)
-    nlp = spacy.load(model_name)
+    print("Creating initial model", model_name)
+    nlp = spacy.blank(model_name)
    if output_directory is not None:
        output_directory = Path(output_directory)

@ -91,6 +77,11 @@ def main(model_name, output_directory=None):
            "Horses are too tall and they pretend to care about your feelings",
            [(0, 6, 'ANIMAL')],
        ),
+        (
+            "Do they bite?", 
+            [],
+        ),
+ 
        (
            "horses are too tall and they pretend to care about your feelings",
            [(0, 6, 'ANIMAL')]
@ -109,18 +100,20 @@ def main(model_name, output_directory=None):
        )

    ]
-    nlp.entity.add_label('ANIMAL')
+    nlp.pipeline.append(TokenVectorEncoder(nlp.vocab))
+    nlp.pipeline.append(NeuralEntityRecognizer(nlp.vocab))
+    nlp.pipeline[-1].add_label('ANIMAL')
    train_ner(nlp, train_data, output_directory)

    # Test that the entity is recognized
-    doc = nlp('Do you like horses?')
+    text = 'Do you like horses?'
    print("Ents in 'Do you like horses?':")
+    doc = nlp(text)
    for ent in doc.ents:
        print(ent.label_, ent.text)
    if output_directory:
        print("Loading from", output_directory)
-        nlp2 = spacy.load('en', path=output_directory)
-        nlp2.entity.add_label('ANIMAL')
+        nlp2 = spacy.load(output_directory)
        doc2 = nlp2('Do you like horses?')
        for ent in doc2.ents:
            print(ent.label_, ent.text)
--- a/examples/training/train_textcat.py
+++ b/examples/training/train_textcat.py
@ -1,3 +1,7 @@
+'''Train a multi-label convolutional neural network text classifier,
+using the spacy.pipeline.TextCategorizer component. The model is then added
+to spacy.pipeline, and predictions are available at `doc.cats`.
+'''
 from __future__ import unicode_literals
 import plac
 import random
@ -12,6 +16,11 @@ from spacy.gold import GoldParse, minibatch
 from spacy.util import compounding
 from spacy.pipeline import TextCategorizer

+# TODO: Remove this once we're not supporting models trained with thinc <6.9.0
+import thinc.neural._classes.layernorm
+thinc.neural._classes.layernorm.set_compat_six_eight(False)
+
+

 def train_textcat(tokenizer, textcat,
                  train_texts, train_cats, dev_texts, dev_cats,
@ -24,14 +33,15 @@ def train_textcat(tokenizer, textcat,
    train_docs = [tokenizer(text) for text in train_texts]
    train_gold = [GoldParse(doc, cats=cats) for doc, cats in
                  zip(train_docs, train_cats)]
-    train_data = zip(train_docs, train_gold)
+    train_data = list(zip(train_docs, train_gold))
    batch_sizes = compounding(4., 128., 1.001)
    for i in range(n_iter):
        losses = {}
-        train_data = tqdm.tqdm(train_data, leave=False) # Progress bar
-        for batch in minibatch(train_data, size=batch_sizes):
+        # Progress bar and minibatching
+        batches = minibatch(tqdm.tqdm(train_data, leave=False), size=batch_sizes)
+        for batch in batches:
            docs, golds = zip(*batch)
-            textcat.update((docs, None), golds, sgd=optimizer, drop=0.2,
+            textcat.update(docs, golds, sgd=optimizer, drop=0.2,
                losses=losses)
        with textcat.model.use_params(optimizer.averages):
            scores = evaluate(tokenizer, textcat, dev_texts, dev_cats)
@ -61,12 +71,13 @@ def evaluate(tokenizer, textcat, texts, cats):
    return {'textcat_p': precis, 'textcat_r': recall, 'textcat_f': fscore}  


-def load_data():
+def load_data(limit=0):
    # Partition off part of the train data --- avoid running experiments
    # against test.
    train_data, _ = thinc.extra.datasets.imdb()

    random.shuffle(train_data)
+    train_data = train_data[-limit:]

    texts, labels = zip(*train_data)
    cats = [(['POSITIVE'] if y else []) for y in labels]
@ -86,7 +97,7 @@ def main(model_loc=None):
    textcat = TextCategorizer(tokenizer.vocab, labels=['POSITIVE'])

    print("Load IMDB data")
-    (train_texts, train_cats), (dev_texts, dev_cats) = load_data()
+    (train_texts, train_cats), (dev_texts, dev_cats) = load_data(limit=1000)

    print("Itn.\tLoss\tP\tR\tF")
    progress = '{i:d} {loss:.3f} {textcat_p:.3f} {textcat_r:.3f} {textcat_f:.3f}'
--- a/examples/vectors_fast_text.py
+++ b/examples/vectors_fast_text.py
@ -0,0 +1,30 @@
+'''Load vectors for a language trained using FastText
+
+https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md
+'''
+from __future__ import unicode_literals
+import plac
+import numpy
+
+import spacy.language
+
+
+def main(vectors_loc):
+    nlp = spacy.language.Language()
+
+    with open(vectors_loc, 'rb') as file_:
+        header = file_.readline()
+        nr_row, nr_dim = header.split()
+        nlp.vocab.clear_vectors(int(nr_dim))
+        for line in file_:
+            line = line.decode('utf8')
+            pieces = line.split() 
+            word = pieces[0]
+            vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
+            nlp.vocab.set_vector(word, vector)
+    doc = nlp(u'class colspan')
+    print(doc[0].similarity(doc[1]))
+
+
+if __name__ == '__main__':
+    plac.call(main)
--- a/fabfile.py
+++ b/fabfile.py
@ -14,6 +14,7 @@ VENV_DIR = path.join(PWD, ENV)
 def env(lang='python2.7'):
    if path.exists(VENV_DIR):
        local('rm -rf {env}'.format(env=VENV_DIR))
+    local('pip install virtualenv')
    local('python -m virtualenv -p {lang} {env}'.format(lang=lang, env=VENV_DIR))


@ -32,6 +33,10 @@ def make():
            local('pip install -r requirements.txt')
            local('python setup.py build_ext --inplace')

+def sdist():
+    with virtualenv(VENV_DIR):
+        with lcd(path.dirname(__file__)):
+            local('python setup.py sdist')

 def clean():
    with lcd(path.dirname(__file__)):
--- a/requirements.txt
+++ b/requirements.txt
@ -1,9 +1,9 @@
-cython<0.24
+cython>=0.24,<0.27.0
 pathlib
 numpy>=1.7
 cymem>=1.30,<1.32
 preshed>=1.0.0,<2.0.0
-thinc>=6.8.0,<6.9.0
+thinc>=6.9.0,<6.10.0
 murmurhash>=0.28,<0.29
 plac<1.0.0,>=0.9.6
 six
@ -13,7 +13,7 @@ requests>=2.13.0,<3.0.0
 regex==2017.4.5
 ftfy>=4.4.2,<5.0.0
 pytest>=3.0.6,<4.0.0
-pip>=9.0.0,<10.0.0
 mock>=2.0.0,<3.0.0
 msgpack-python
 msgpack-numpy
+html5lib==1.0b8
--- a/setup.py
+++ b/setup.py
@ -195,9 +195,8 @@ def setup_package():
                'murmurhash>=0.28,<0.29',
                'cymem>=1.30,<1.32',
                'preshed>=1.0.0,<2.0.0',
-                'thinc>=6.8.0,<6.9.0',
+                'thinc>=6.9.0,<6.10.0',
                'plac<1.0.0,>=0.9.6',
-                'pip>=9.0.0,<10.0.0',
                'six',
                'pathlib',
                'ujson>=1.35',
--- a/spacy/init.py
+++ b/spacy/init.py
@ -4,11 +4,13 @@ from __future__ import unicode_literals
 from .cli.info import info as cli_info
 from .glossary import explain
 from .deprecated import resolve_load_name
+#from .about import __version__
 from .about import __version__
 from . import util


 def load(name, **overrides):
+    from .deprecated import resolve_load_name
    name = resolve_load_name(name, **overrides)
    return util.load_model(name, **overrides)

--- a/spacy/main.py
+++ b/spacy/main.py
@ -7,7 +7,7 @@ if __name__ == '__main__':
    import plac
    import sys
    from spacy.cli import download, link, info, package, train, convert, model
-    from spacy.cli import profile
+    from spacy.cli import profile, evaluate
    from spacy.util import prints

    commands = {
@ -15,6 +15,7 @@ if __name__ == '__main__':
        'link': link,
        'info': info,
        'train': train,
+        'evaluate': evaluate,
        'convert': convert,
        'package': package,
        'model': model,
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@ -1,28 +1,27 @@
 import ujson
+from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu, SELU
+from thinc.i2v import HashEmbed, StaticVectors
+from thinc.t2t import ExtractWindow, ParametricAttention
+from thinc.t2v import Pooling, max_pool, mean_pool, sum_pool
+from thinc.misc import Residual
+from thinc.misc import BatchNorm as BN
+from thinc.misc import LayerNorm as LN
+
 from thinc.api import add, layerize, chain, clone, concatenate, with_flatten
-from thinc.neural import Model, Maxout, Softmax, Affine
-from thinc.neural._classes.hash_embed import HashEmbed
+from thinc.api import FeatureExtracter, with_getitem
+from thinc.api import uniqued, wrap, flatten_add_lengths, noop
+
+from thinc.linear.linear import LinearModel
 from thinc.neural.ops import NumpyOps, CupyOps
 from thinc.neural.util import get_array_module
+
 import random
 import cytoolz

-from thinc.neural._classes.convolution import ExtractWindow
-from thinc.neural._classes.static_vectors import StaticVectors
-from thinc.neural._classes.batchnorm import BatchNorm as BN
-from thinc.neural._classes.layernorm import LayerNorm as LN
-from thinc.neural._classes.resnet import Residual
-from thinc.neural import ReLu
-from thinc.neural._classes.selu import SELU
 from thinc import describe
 from thinc.describe import Dimension, Synapses, Biases, Gradient
 from thinc.neural._classes.affine import _set_dimensions_if_needed
-from thinc.api import FeatureExtracter, with_getitem
-from thinc.neural.pooling import Pooling, max_pool, mean_pool, sum_pool
-from thinc.neural._classes.attention import ParametricAttention
-from thinc.linear.linear import LinearModel
-from thinc.api import uniqued, wrap, flatten_add_lengths
-
+import thinc.extra.load_nlp

 from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP, CLUSTER
 from .tokens.doc import Doc
@ -31,6 +30,11 @@ from . import util
 import numpy
 import io

+# TODO: Unset this once we don't want to support models previous models.
+import thinc.neural._classes.layernorm
+thinc.neural._classes.layernorm.set_compat_six_eight(True)
+
+VECTORS_KEY = 'spacy_pretrained_vectors'

@layerize
 def _flatten_add_lengths(seqs, pad=0, drop=0.):
@ -225,33 +229,80 @@ def drop_layer(layer, factor=2.):
    model.predict = layer
    return model

+def link_vectors_to_models(vocab):
+    vectors = vocab.vectors
+    ops = Model.ops
+    for word in vocab:
+        if word.orth in vectors.key2row:
+            word.rank = vectors.key2row[word.orth]
+        else:
+            word.rank = 0
+    data = ops.asarray(vectors.data)
+    # Set an entry here, so that vectors are accessed by StaticVectors
+    # (unideal, I know)
+    thinc.extra.load_nlp.VECTORS[(ops.device, VECTORS_KEY)] = data

-def Tok2Vec(width, embed_size, preprocess=None):
+def Tok2Vec(width, embed_size, **kwargs):
+    pretrained_dims = kwargs.get('pretrained_dims', 0)
+    cnn_maxout_pieces = kwargs.get('cnn_maxout_pieces', 3)
    cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
-    with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}):
+    with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add,
+                                 '*': reapply}):
        norm = HashEmbed(width, embed_size, column=cols.index(NORM), name='embed_norm')
        prefix = HashEmbed(width, embed_size//2, column=cols.index(PREFIX), name='embed_prefix')
        suffix = HashEmbed(width, embed_size//2, column=cols.index(SUFFIX), name='embed_suffix')
        shape = HashEmbed(width, embed_size//2, column=cols.index(SHAPE), name='embed_shape')
+        if pretrained_dims is not None and pretrained_dims >= 1:
+            glove = StaticVectors(VECTORS_KEY, width, column=cols.index(ID))

-        embed = (norm | prefix | suffix | shape ) >> LN(Maxout(width, width*4, pieces=3))
-        tok2vec = (
-            with_flatten(
-                asarray(Model.ops, dtype='uint64')
-                >> uniqued(embed, column=5)
-                >> Residual(
-                    (ExtractWindow(nW=1) >> LN(Maxout(width, width*3)))
-                ) ** 4, pad=4
-            )
+            embed = uniqued(
+                (glove | norm | prefix | suffix | shape)
+                >> LN(Maxout(width, width*5, pieces=3)), column=5)
+        else:
+            embed = uniqued(
+                (norm | prefix | suffix | shape)
+                >> LN(Maxout(width, width*4, pieces=3)), column=5)
+
+
+        convolution = Residual(
+            ExtractWindow(nW=1)
+            >> LN(Maxout(width, width*3, pieces=cnn_maxout_pieces))
        )
-        if preprocess not in (False, None):
-            tok2vec = preprocess >> tok2vec
+
+        tok2vec = (
+            FeatureExtracter(cols)
+            >> with_flatten(
+                embed >> (convolution ** 4), pad=4)
+        )
+
        # Work around thinc API limitations :(. TODO: Revise in Thinc 7
        tok2vec.nO = width
        tok2vec.embed = embed
    return tok2vec


+def reapply(layer, n_times):
+    def reapply_fwd(X, drop=0.):
+        backprops = []
+        for i in range(n_times):
+            Y, backprop = layer.begin_update(X, drop=drop)
+            X = Y
+            backprops.append(backprop)
+        def reapply_bwd(dY, sgd=None):
+            dX = None
+            for backprop in reversed(backprops):
+                dY = backprop(dY, sgd=sgd)
+                if dX is None:
+                    dX = dY
+                else:
+                    dX += dY
+            return dX
+        return Y, reapply_bwd
+    return wrap(reapply_fwd, layer)
+
+
+
+
 def asarray(ops, dtype):
    def forward(X, drop=0.):
        return ops.asarray(X, dtype=dtype), None
@ -455,20 +506,25 @@ def getitem(i):
        return X[i], None
    return layerize(getitem_fwd)

-def build_tagger_model(nr_class, token_vector_width, **cfg):
-    embed_size = util.env_opt('embed_size', 7500)
+def build_tagger_model(nr_class, **cfg):
+    embed_size = util.env_opt('embed_size', 7000)
+    if 'token_vector_width' in cfg:
+        token_vector_width = cfg['token_vector_width']
+    else:
+        token_vector_width = util.env_opt('token_vector_width', 128)
+    pretrained_dims = cfg.get('pretrained_dims', 0)
    with Model.define_operators({'>>': chain, '+': add}):
-        # Input: (doc, tensor) tuples
-        private_tok2vec = Tok2Vec(token_vector_width, embed_size, preprocess=doc2feats())
-
+        if 'tok2vec' in cfg:
+            tok2vec = cfg['tok2vec']
+        else:
+            tok2vec = Tok2Vec(token_vector_width, embed_size,
+                              pretrained_dims=pretrained_dims)
        model = (
-            fine_tune(private_tok2vec)
-            >> with_flatten(
-                Maxout(token_vector_width, token_vector_width)
-                >> Softmax(nr_class, token_vector_width)
-            )
+            tok2vec
+            >> with_flatten(Softmax(nr_class, token_vector_width))
        )
    model.nI = None
+    model.tok2vec = tok2vec
    return model


@ -514,6 +570,7 @@ def foreach(layer, drop_factor=1.0):

 def build_text_classifier(nr_class, width=64, **cfg):
    nr_vector = cfg.get('nr_vector', 5000)
+    pretrained_dims = cfg.get('pretrained_dims', 0)
    with Model.define_operators({'>>': chain, '+': add, '|': concatenate,
                                 '**': clone}):
        if cfg.get('low_data'):
@ -521,7 +578,7 @@ def build_text_classifier(nr_class, width=64, **cfg):
                SpacyVectors
                >> flatten_add_lengths
                >> with_getitem(0,
-                    Affine(width, 300)
+                    Affine(width, pretrained_dims)
                )
                >> ParametricAttention(width)
                >> Pooling(sum_pool)
@ -548,18 +605,24 @@ def build_text_classifier(nr_class, width=64, **cfg):
            )
        )

-        static_vectors = (
-            SpacyVectors
-            >> with_flatten(Affine(width, 300))
-        )
-
-        cnn_model = (
+        if pretrained_dims:
+            static_vectors = (
+                SpacyVectors
+                >> with_flatten(Affine(width, pretrained_dims))
+            )
            # TODO Make concatenate support lists
-            concatenate_lists(trained_vectors, static_vectors)
+            vectors = concatenate_lists(trained_vectors, static_vectors)
+            vectors_width = width*2
+        else:
+            vectors = trained_vectors
+            vectors_width = width
+            static_vectors = None
+        cnn_model = (
+            vectors
            >> with_flatten(
-                LN(Maxout(width, width*2))
+                LN(Maxout(width, vectors_width))
                >> Residual(
-                    (ExtractWindow(nW=1) >> zero_init(Maxout(width, width*3)))
+                    (ExtractWindow(nW=1) >> LN(Maxout(width, width*3)))
                ) ** 2, pad=2
            )
            >> flatten_add_lengths
@ -579,7 +642,7 @@ def build_text_classifier(nr_class, width=64, **cfg):
            >> zero_init(Affine(nr_class, nr_class*2, drop_factor=0.0))
            >> logistic
        )
-
+    model.nO = nr_class
    model.lsuv = False
    return model

--- a/spacy/about.py
+++ b/spacy/about.py
@ -3,14 +3,15 @@
 # https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py

 __title__ = 'spacy-nightly'
-__version__ = '2.0.0a13'
+__version__ = '2.0.0a16'
 __summary__ = 'Industrial-strength Natural Language Processing (NLP) with Python and Cython'
 __uri__ = 'https://spacy.io'
 __author__ = 'Explosion AI'
 __email__ = 'contact@explosion.ai'
 __license__ = 'MIT'
+__release__ = True

-__docs_models__ = 'https://spacy.io/docs/usage/models'
+__docs_models__ = 'https://alpha.spacy.io/usage/models'
 __download_url__ = 'https://github.com/explosion/spacy-models/releases/download'
 __compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json'
 __shortcuts__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/shortcuts.json'
--- a/spacy/attrs.pxd
+++ b/spacy/attrs.pxd
@ -1,5 +1,5 @@
 # Reserve 64 values for flag features
-cpdef enum attr_id_t:
+cdef enum attr_id_t:
    NULL_ATTR
    IS_ALPHA
    IS_ASCII
--- a/spacy/attrs.pyx
+++ b/spacy/attrs.pyx
@ -94,6 +94,7 @@ IDS = {

 # ATTR IDs, in order of the symbol
 NAMES = [key for key, value in sorted(IDS.items(), key=lambda item: item[1])]
+locals().update(IDS)


 def intify_attrs(stringy_attrs, strings_map=None, _do_deprecated=False):
--- a/spacy/cli/init.py
+++ b/spacy/cli/init.py
@ -4,5 +4,6 @@ from .link import link
 from .package import package
 from .profile import profile
 from .train import train
+from .evaluate import evaluate
 from .convert import convert
 from .model import model
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@ -14,7 +14,7 @@ from ..util import prints
 CONVERTERS = {
    '.conllu': conllu2json,
    '.conll': conllu2json,
-    '.iob': iob2json
+    '.iob': iob2json,
 }


--- a/spacy/cli/converters/iob2json.py
+++ b/spacy/cli/converters/iob2json.py
@ -1,5 +1,6 @@
 # coding: utf8
 from __future__ import unicode_literals
+from cytoolz import partition_all, concat

 from ...compat import json_dumps, path2str
 from ...util import prints
@ -10,11 +11,9 @@ def iob2json(input_path, output_path, n_sents=10, *a, **k):
    """
    Convert IOB files into JSON format for use with train cli.
    """
-    # TODO: This isn't complete yet -- need to map from IOB to
-    # BILUO
    with input_path.open('r', encoding='utf8') as file_:
-        docs = read_iob(file_)
-
+        sentences = read_iob(file_)
+    docs = merge_sentences(sentences, n_sents)
    output_filename = input_path.parts[-1].replace(".iob", ".json")
    output_file = output_path / output_filename
    with output_file.open('w', encoding='utf-8') as f:
@ -23,9 +22,9 @@ def iob2json(input_path, output_path, n_sents=10, *a, **k):
           title="Generated output file %s" % path2str(output_file))


-def read_iob(file_):
+def read_iob(raw_sents):
    sentences = []
-    for line in file_:
+    for line in raw_sents:
        if not line.strip():
            continue
        tokens = [t.split('|') for t in line.split()]
@ -43,3 +42,15 @@ def read_iob(file_):
    paragraphs = [{'sentences': [sent]} for sent in sentences]
    docs = [{'id': 0, 'paragraphs': [para]} for para in paragraphs]
    return docs
+
+def merge_sentences(docs, n_sents):
+    counter = 0
+    merged = []
+    for group in partition_all(n_sents, docs):
+        group = list(group)
+        first = group.pop(0)
+        to_extend = first['paragraphs'][0]['sentences']
+        for sent in group[1:]:
+            to_extend.extend(sent['paragraphs'][0]['sentences'])
+        merged.append(first)
+    return merged
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@ -0,0 +1,119 @@
+# coding: utf8
+from __future__ import unicode_literals, division, print_function
+
+import plac
+import json
+from collections import defaultdict
+import cytoolz
+from pathlib import Path
+import dill
+import tqdm
+from thinc.neural._classes.model import Model
+from thinc.neural.optimizers import linear_decay
+from timeit import default_timer as timer
+import random
+import numpy.random
+
+from ..tokens.doc import Doc
+from ..scorer import Scorer
+from ..gold import GoldParse, merge_sents
+from ..gold import GoldCorpus, minibatch
+from ..util import prints
+from .. import util
+from .. import about
+from .. import displacy
+from ..compat import json_dumps
+
+random.seed(0)
+numpy.random.seed(0)
+
+
+@plac.annotations(
+    model=("Model name or path", "positional", None, str),
+    data_path=("Location of JSON-formatted evaluation data", "positional", None, str),
+    gold_preproc=("Use gold preprocessing", "flag", "G", bool),
+    gpu_id=("Use GPU", "option", "g", int),
+    displacy_path=("Directory to output rendered parses as HTML", "option", "dp", str),
+    displacy_limit=("Limit of parses to render as HTML", "option", "dl", int)
+)
+def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False,
+             displacy_path=None, displacy_limit=25):
+    """
+    Evaluate a model. To render a sample of parses in a HTML file, set an output
+    directory as the displacy_path argument.
+    """
+    util.use_gpu(gpu_id)
+    util.set_env_log(False)
+    data_path = util.ensure_path(data_path)
+    displacy_path = util.ensure_path(displacy_path)
+    if not data_path.exists():
+        prints(data_path, title="Evaluation data not found", exits=1)
+    if displacy_path and not displacy_path.exists():
+        prints(displacy_path, title="Visualization output directory not found", exits=1)
+    corpus = GoldCorpus(data_path, data_path)
+    nlp = util.load_model(model)
+    dev_docs = list(corpus.dev_docs(nlp, gold_preproc=gold_preproc))
+    begin = timer()
+    scorer = nlp.evaluate(dev_docs, verbose=False)
+    end = timer()
+    nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
+    print_results(scorer, time=end - begin, words=nwords,
+                  wps=nwords / (end - begin))
+    if displacy_path:
+        docs, golds = zip(*dev_docs)
+        render_deps = 'parser' in nlp.meta.get('pipeline', [])
+        render_ents = 'ner' in nlp.meta.get('pipeline', [])
+        render_parses(docs, displacy_path, model_name=model, limit=displacy_limit,
+                      deps=render_deps, ents=render_ents)
+        prints(displacy_path, title="Generated %s parses as HTML" % displacy_limit)
+
+
+def render_parses(docs, output_path, model_name='', limit=250, deps=True, ents=True):
+    docs[0].user_data['title'] = model_name
+    if ents:
+        with (output_path / 'entities.html').open('w') as file_:
+            html = displacy.render(docs[:limit], style='ent', page=True)
+            file_.write(html)
+    if deps:
+        with (output_path / 'parses.html').open('w') as file_:
+            html = displacy.render(docs[:limit], style='dep', page=True, options={'compact': True})
+            file_.write(html)
+
+
+def print_progress(itn, losses, dev_scores, wps=0.0):
+    scores = {}
+    for col in ['dep_loss', 'tag_loss', 'uas', 'tags_acc', 'token_acc',
+                'ents_p', 'ents_r', 'ents_f', 'wps']:
+        scores[col] = 0.0
+    scores['dep_loss'] = losses.get('parser', 0.0)
+    scores['ner_loss'] = losses.get('ner', 0.0)
+    scores['tag_loss'] = losses.get('tagger', 0.0)
+    scores.update(dev_scores)
+    scores['wps'] = wps
+    tpl = '\t'.join((
+        '{:d}',
+        '{dep_loss:.3f}',
+        '{ner_loss:.3f}',
+        '{uas:.3f}',
+        '{ents_p:.3f}',
+        '{ents_r:.3f}',
+        '{ents_f:.3f}',
+        '{tags_acc:.3f}',
+        '{token_acc:.3f}',
+        '{wps:.1f}'))
+    print(tpl.format(itn, **scores))
+
+
+def print_results(scorer, time, words, wps):
+    results = {
+        'Time': '%.2f s' % time,
+        'Words': words,
+        'Words/s': '%.0f' % wps,
+        'TOK': '%.2f' % scorer.token_acc,
+        'POS': '%.2f' % scorer.tags_acc,
+        'UAS': '%.2f' % scorer.uas,
+        'LAS': '%.2f' % scorer.las,
+        'NER P': '%.2f' % scorer.ents_p,
+        'NER R': '%.2f' % scorer.ents_r,
+        'NER F': '%.2f' % scorer.ents_f}
+    util.print_table(results, title="Results")
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@ -105,8 +105,11 @@ def generate_pipeline():
           "parser, ner. For more information, see the docs on processing pipelines.",
           title="Enter your model's pipeline components")
    pipeline = util.get_raw_input("Pipeline components", True)
-    replace = {'True': True, 'False': False}
-    return replace[pipeline] if pipeline in replace else pipeline.split(', ')
+    subs = {'True': True, 'False': False}
+    if pipeline in subs:
+        return subs[pipeline]
+    else:
+        return [p.strip() for p in pipeline.split(',')]


 def validate_meta(meta, keys):
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -8,8 +8,11 @@ import cytoolz
 from pathlib import Path
 import dill
 import tqdm
+from thinc.neural._classes.model import Model
 from thinc.neural.optimizers import linear_decay
 from timeit import default_timer as timer
+import random
+import numpy.random

 from ..tokens.doc import Doc
 from ..scorer import Scorer
@ -17,9 +20,13 @@ from ..gold import GoldParse, merge_sents
 from ..gold import GoldCorpus, minibatch
 from ..util import prints
 from .. import util
+from .. import about
 from .. import displacy
 from ..compat import json_dumps

+random.seed(0)
+numpy.random.seed(0)
+

@plac.annotations(
    lang=("model language", "positional", None, str),
@ -29,15 +36,17 @@ from ..compat import json_dumps
    n_iter=("number of iterations", "option", "n", int),
    n_sents=("number of sentences", "option", "ns", int),
    use_gpu=("Use GPU", "option", "g", int),
-    resume=("Whether to resume training", "flag", "R", bool),
+    vectors=("Model to load vectors from", "option", "v"),
    no_tagger=("Don't train tagger", "flag", "T", bool),
    no_parser=("Don't train parser", "flag", "P", bool),
    no_entities=("Don't train NER", "flag", "N", bool),
    gold_preproc=("Use gold preprocessing", "flag", "G", bool),
+    version=("Model version", "option", "V", str),
+    meta_path=("Optional path to meta.json. All relevant properties will be overwritten.", "option", "m", Path)
 )
-def train(cmd, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
-          use_gpu=-1, resume=False, no_tagger=False, no_parser=False, no_entities=False,
-          gold_preproc=False):
+def train(cmd, lang, output_dir, train_data, dev_data, n_iter=10, n_sents=0,
+          use_gpu=-1, vectors=None, no_tagger=False, no_parser=False, no_entities=False,
+          gold_preproc=False, version="0.0.0", meta_path=None):
    """
    Train a model. Expects data in spaCy's JSON format.
    """
@ -46,19 +55,24 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
    output_path = util.ensure_path(output_dir)
    train_path = util.ensure_path(train_data)
    dev_path = util.ensure_path(dev_data)
+    meta_path = util.ensure_path(meta_path)
    if not output_path.exists():
        output_path.mkdir()
    if not train_path.exists():
        prints(train_path, title="Training data not found", exits=1)
    if dev_path and not dev_path.exists():
        prints(dev_path, title="Development data not found", exits=1)
+    if meta_path is not None and not meta_path.exists():
+        prints(meta_path, title="meta.json not found", exits=1)
+    meta = util.read_json(meta_path) if meta_path else {}
+    if not isinstance(meta, dict):
+        prints("Expected dict but got: {}".format(type(meta)),
+               title="Not a valid meta.json format", exits=1)

-    lang_class = util.get_lang_class(lang)
-
-    pipeline = ['token_vectors', 'tags', 'dependencies', 'entities']
-    if no_tagger and 'tags' in pipeline: pipeline.remove('tags')
-    if no_parser and 'dependencies' in pipeline: pipeline.remove('dependencies')
-    if no_entities and 'entities' in pipeline: pipeline.remove('entities')
+    pipeline = ['tagger', 'parser', 'ner']
+    if no_tagger and 'tagger' in pipeline: pipeline.remove('tagger')
+    if no_parser and 'parser' in pipeline: pipeline.remove('parser')
+    if no_entities and 'ner' in pipeline: pipeline.remove('ner')

    # Take dropout and batch size as generators of values -- dropout
    # starts high and decays sharply, to force the optimizer to explore.
@ -68,33 +82,30 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
                                  util.env_opt('dropout_to', 0.2),
                                  util.env_opt('dropout_decay', 0.0))
    batch_sizes = util.compounding(util.env_opt('batch_from', 1),
-                                   util.env_opt('batch_to', 64),
+                                   util.env_opt('batch_to', 16),
                                   util.env_opt('batch_compound', 1.001))
-
-    if resume:
-        prints(output_path / 'model9.pickle', title="Resuming training")
-        nlp = dill.load((output_path / 'model9.pickle').open('rb'))
-    else:
-        nlp = lang_class(pipeline=pipeline)
    corpus = GoldCorpus(train_path, dev_path, limit=n_sents)
    n_train_words = corpus.count_train()

+    lang_class = util.get_lang_class(lang)
+    nlp = lang_class(pipeline=pipeline)
+    if vectors:
+        util.load_model(vectors, vocab=nlp.vocab)
    optimizer = nlp.begin_training(lambda: corpus.train_tuples, device=use_gpu)
+    nlp._optimizer = None

-    print("Itn.\tLoss\tUAS\tNER P.\tNER R.\tNER F.\tTag %\tToken %")
+    print("Itn.\tP.Loss\tN.Loss\tUAS\tNER P.\tNER R.\tNER F.\tTag %\tToken %")
    try:
+        train_docs = corpus.train_docs(nlp, projectivize=True, noise_level=0.0,
+                                       gold_preproc=gold_preproc, max_length=0)
+        train_docs = list(train_docs)
        for i in range(n_iter):
-            if resume:
-                i += 20
            with tqdm.tqdm(total=n_train_words, leave=False) as pbar:
-                train_docs = corpus.train_docs(nlp, projectivize=True, noise_level=0.0,
-                                               gold_preproc=gold_preproc, max_length=0)
                losses = {}
                for batch in minibatch(train_docs, size=batch_sizes):
                    docs, golds = zip(*batch)
                    nlp.update(docs, golds, sgd=optimizer,
-                               drop=next(dropout_rates), losses=losses,
-                               update_shared=True)
+                               drop=next(dropout_rates), losses=losses)
                    pbar.update(sum(len(doc) for doc in docs))

            with nlp.use_params(optimizer.averages):
@ -104,12 +115,22 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
                nlp_loaded = lang_class(pipeline=pipeline)
                nlp_loaded = nlp_loaded.from_disk(epoch_model_path)
                scorer = nlp_loaded.evaluate(
-                            corpus.dev_docs(
+                            list(corpus.dev_docs(
                                nlp_loaded,
-                                gold_preproc=gold_preproc))
+                                gold_preproc=gold_preproc)))
                acc_loc =(output_path / ('model%d' % i) / 'accuracy.json')
                with acc_loc.open('w') as file_:
                    file_.write(json_dumps(scorer.scores))
+                meta_loc = output_path / ('model%d' % i) / 'meta.json'
+                meta['accuracy'] = scorer.scores
+                meta['lang'] = nlp.lang
+                meta['pipeline'] = pipeline
+                meta['spacy_version'] = '>=%s' % about.__version__
+                meta.setdefault('name', 'model%d' % i)
+                meta.setdefault('version', version)
+
+                with meta_loc.open('w') as file_:
+                    file_.write(json_dumps(meta))
                util.set_env_log(True)
            print_progress(i, losses, scorer.scores)
    finally:
@ -138,12 +159,14 @@ def print_progress(itn, losses, dev_scores, wps=0.0):
                'ents_p', 'ents_r', 'ents_f', 'wps']:
        scores[col] = 0.0
    scores['dep_loss'] = losses.get('parser', 0.0)
+    scores['ner_loss'] = losses.get('ner', 0.0)
    scores['tag_loss'] = losses.get('tagger', 0.0)
    scores.update(dev_scores)
    scores['wps'] = wps
    tpl = '\t'.join((
        '{:d}',
        '{dep_loss:.3f}',
+        '{ner_loss:.3f}',
        '{uas:.3f}',
        '{ents_p:.3f}',
        '{ents_r:.3f}',
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@ -7,6 +7,7 @@ import re
 import ujson
 import random
 import cytoolz
+import itertools

 from .syntax import nonproj
 from .util import ensure_path
@ -146,9 +147,13 @@ def minibatch(items, size=8):
    '''Iterate over batches of items. `size` may be an iterator,
    so that batch-size can vary on each step.
    '''
+    if isinstance(size, int):
+        size_ = itertools.repeat(8)
+    else:
+        size_ = size
    items = iter(items)
    while True:
-        batch_size = next(size) #if hasattr(size, '__next__') else size
+        batch_size = next(size_)
        batch = list(cytoolz.take(int(batch_size), items))
        if len(batch) == 0:
            break
--- a/spacy/lang/char_classes.py
+++ b/spacy/lang/char_classes.py
@ -29,9 +29,9 @@ _units = ('km km² km³ m m² m³ dm dm² dm³ cm cm² cm³ mm mm² mm³ ha µm
          'kg g mg µg t lb oz m/s km/h kmh mph hPa Pa mbar mb MB kb KB gb GB tb '
          'TB T G M K %')
 _currency = r'\$ £ € ¥ ฿ US\$ C\$ A\$'
-_punct = r'… , : ; \! \? ¿ ¡ \( \) \[ \] \{ \} < > _ # \* &'
+_punct = r'… …… , : ; \! \? ¿ ¡ \( \) \[ \] \{ \} < > _ # \* & 。 ？ ！ ， 、 ； ： ～ ·'
 _quotes = r'\' \'\' " ” “ `` ` ‘ ´ ‚ , „ » «'
-_hyphens = '- – — -- ---'
+_hyphens = '- – — -- --- —— ~'
 _other_symbols = r'[\p{So}]'

 UNITS = merge_chars(_units)
--- a/spacy/lang/de/init.py
+++ b/spacy/lang/de/init.py
@ -3,6 +3,7 @@ from __future__ import unicode_literals

 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 from .norm_exceptions import NORM_EXCEPTIONS
+from .punctuation import TOKENIZER_INFIXES
 from .tag_map import TAG_MAP
 from .stop_words import STOP_WORDS
 from .lemmatizer import LOOKUP
@ -23,6 +24,7 @@ class GermanDefaults(Language.Defaults):
                                         NORM_EXCEPTIONS, BASE_NORMS)

    tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
+    infixes = tuple(TOKENIZER_INFIXES)
    tag_map = dict(TAG_MAP)
    stop_words = set(STOP_WORDS)
    syntax_iterators = dict(SYNTAX_ITERATORS)
--- a/spacy/lang/de/punctuation.py
+++ b/spacy/lang/de/punctuation.py
@ -0,0 +1,20 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+from ..char_classes import LIST_ELLIPSES, LIST_ICONS
+from ..char_classes import QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
+
+
+_quotes = QUOTES.replace("'", '')
+
+_infixes = (LIST_ELLIPSES + LIST_ICONS +
+            [r'(?<=[{}])\.(?=[{}])'.format(ALPHA_LOWER, ALPHA_UPPER),
+             r'(?<=[{a}])[,!?](?=[{a}])'.format(a=ALPHA),
+             r'(?<=[{a}"])[:<>=](?=[{a}])'.format(a=ALPHA),
+             r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA),
+             r'(?<=[{a}])([{q}\)\]\(\[])(?=[\{a}])'.format(a=ALPHA, q=_quotes),
+             r'(?<=[{a}])--(?=[{a}])'.format(a=ALPHA),
+             r'(?<=[0-9])-(?=[0-9])'])
+
+
+TOKENIZER_INFIXES = _infixes
--- a/spacy/lang/fr/init.py
+++ b/spacy/lang/fr/init.py
@ -4,6 +4,7 @@ from __future__ import unicode_literals
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, TOKEN_MATCH
 from .punctuation import TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
 from .stop_words import STOP_WORDS
+from .lex_attrs import LEX_ATTRS
 from .lemmatizer import LOOKUP
 from .syntax_iterators import SYNTAX_ITERATORS

@ -17,6 +18,7 @@ from ...util import update_exc, add_lookups

 class FrenchDefaults(Language.Defaults):
    lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
+    lex_attr_getters.update(LEX_ATTRS)
    lex_attr_getters[LANG] = lambda text: 'fr'
    lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)

--- a/spacy/lang/fr/lex_attrs.py
+++ b/spacy/lang/fr/lex_attrs.py
@ -0,0 +1,41 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+from ...attrs import LIKE_NUM
+
+
+_num_words = set("""
+zero un deux trois quatre cinq six sept huit neuf dix
+onze douze treize quatorze quinze seize dix-sept dix-huit dix-neuf
+vingt trente quanrante cinquante soixante septante quatre-vingt huitante nonante
+cent mille mil million milliard billion quadrillion quintillion
+sextillion septillion octillion nonillion decillion
+""".split())
+
+_ordinal_words = set("""
+premier deuxième second troisième quatrième cinquième sixième septième huitième neuvième dixième
+onzième douzième treizième quatorzième quinzième seizième dix-septième dix-huitième dix-neufième
+vingtième trentième quanrantième cinquantième soixantième septantième quatre-vingtième huitantième nonantième
+centième millième millionnième milliardième billionnième quadrillionnième quintillionnième
+sextillionnième septillionnième octillionnième nonillionnième decillionnième
+""".split())
+
+
+def like_num(text):
+    # Might require more work?
+    # See this discussion: https://github.com/explosion/spaCy/pull/1161
+    text = text.replace(',', '').replace('.', '')
+    if text.isdigit():
+        return True
+    if text.count('/') == 1:
+        num, denom = text.split('/')
+        if num.isdigit() and denom.isdigit():
+            return True
+    if text in _num_words:
+        return True
+    return False
+
+
+LEX_ATTRS = {
+    LIKE_NUM: like_num
+}
--- a/spacy/lang/nl/init.py
+++ b/spacy/lang/nl/init.py
@ -2,6 +2,7 @@
 from __future__ import unicode_literals

 from .stop_words import STOP_WORDS
+from .lex_attrs import LEX_ATTRS

 from ..tokenizer_exceptions import BASE_EXCEPTIONS
 from ..norm_exceptions import BASE_NORMS
@ -12,6 +13,7 @@ from ...util import update_exc, add_lookups

 class DutchDefaults(Language.Defaults):
    lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
+    lex_attr_getters.update(LEX_ATTRS)
    lex_attr_getters[LANG] = lambda text: 'nl'
    lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)

--- a/spacy/lang/nl/lex_attrs.py
+++ b/spacy/lang/nl/lex_attrs.py
@ -0,0 +1,40 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+from ...attrs import LIKE_NUM
+
+
+_num_words = set("""
+nul een één twee drie vier vijf zes zeven acht negen tien elf twaalf dertien
+veertien twintig dertig veertig vijftig zestig zeventig tachtig negentig honderd
+duizend miljoen miljard biljoen biljard triljoen triljard
+""".split())
+
+_ordinal_words = set("""
+eerste tweede derde vierde vijfde zesde zevende achtste negende tiende elfde
+twaalfde dertiende veertiende twintigste dertigste veertigste vijftigste
+zestigste zeventigste tachtigste negentigste honderdste duizendste miljoenste
+miljardste biljoenste biljardste triljoenste triljardste
+""".split())
+
+
+def like_num(text):
+    # This only does the most basic check for whether a token is a digit
+    # or matches one of the number words. In order to handle numbers like
+    # "drieëntwintig", more work is required.
+    # See this discussion: https://github.com/explosion/spaCy/pull/1177
+    text = text.replace(',', '').replace('.', '')
+    if text.isdigit():
+        return True
+    if text.count('/') == 1:
+        num, denom = text.split('/')
+        if num.isdigit() and denom.isdigit():
+            return True
+    if text in _num_words:
+        return True
+    return False
+
+
+LEX_ATTRS = {
+    LIKE_NUM: like_num
+}
--- a/spacy/lang/th/init.py
+++ b/spacy/lang/th/init.py
@ -0,0 +1,35 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+from .tag_map import TAG_MAP
+from .stop_words import STOP_WORDS
+
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...tokens import Doc
+from ..norm_exceptions import BASE_NORMS
+from ...language import Language
+from ...attrs import LANG, NORM
+from ...util import update_exc, add_lookups
+
+class ThaiDefaults(Language.Defaults):
+    lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
+    lex_attr_getters[LANG] = lambda text: 'th'
+    tokenizer_exceptions = TOKENIZER_EXCEPTIONS
+    tag_map = dict(TAG_MAP)
+    stop_words = set(STOP_WORDS)
+
+
+class Thai(Language):
+	lang = 'th'
+	Defaults = ThaiDefaults
+	def make_doc(self, text):
+		try:
+			from pythainlp.tokenize import word_tokenize
+		except ImportError:
+			raise ImportError("The Thai tokenizer requires the PyThaiNLP library: "
+								"https://github.com/wannaphongcom/pythainlp/")
+		words = [x for x in list(word_tokenize(text,"newmm"))]
+		return Doc(self.vocab, words=words, spaces=[False]*len(words))
+
+__all__ = ['Thai']
--- a/spacy/lang/th/stop_words.py
+++ b/spacy/lang/th/stop_words.py
@ -0,0 +1,62 @@
+# encoding: utf8
+from __future__ import unicode_literals
+
+# data from https://github.com/wannaphongcom/pythainlp/blob/dev/pythainlp/corpus/stopwords-th.txt
+# stop words as whitespace-separated list
+STOP_WORDS = set("""
+นี้	นํา	นั้น	นัก	นอกจาก	ทุก	ที่สุด	ที่	ทําให้	ทํา	ทาง	ทั้งนี้	ดัง	ซึ่ง	ช่วง	จาก	จัด	จะ	คือ	ความ	ครั้ง	คง	ขึ้น	ของ
+ขอ	รับ	ระหว่าง	รวม	ยัง	มี	มาก	มา	พร้อม	พบ	ผ่าน	ผล	บาง	น่า	เปิดเผย	เปิด	เนื่องจาก	เดียวกัน	เดียว	เช่น	เฉพาะ	เข้า	ถ้า
+ถูก	ถึง	ต้อง	ต่างๆ	ต่าง	ต่อ	ตาม	ตั้งแต่	ตั้ง	ด้าน	ด้วย	อีก	อาจ	ออก	อย่าง	อะไร	อยู่	อยาก	หาก	หลาย	หลังจาก	แต่	เอง	เห็น
+เลย	เริ่ม	เรา	เมื่อ	เพื่อ	เพราะ	เป็นการ	เป็น	หลัง	หรือ	หนึ่ง	ส่วน	ส่ง	สุด	สําหรับ	ว่า	ลง	ร่วม	ราย	ขณะ	ก่อน	ก็	การ	กับ	กัน
+กว่า	กล่าว	จึง	ไว้	ไป	ได้	ให้	ใน	โดย	แห่ง	แล้ว	และ	แรก	แบบ	ๆ	ทั้ง	วัน	เขา	เคย	ไม่	อยาก	เกิน	เกินๆ	เกี่ยวกัน	เกี่ยวกับ
+เกี่ยวข้อง	เกี่ยวเนื่อง	เกี่ยวๆ	เกือบ	เกือบจะ	เกือบๆ	แก	แก่	แก้ไข	ใกล้	ใกล้ๆ	ไกล	ไกลๆ	ขณะเดียวกัน	ขณะใด	ขณะใดๆ	ขณะที่	ขณะนั้น	ขณะนี้	ขณะหนึ่ง	ขวาง
+ขวางๆ	ขั้น	ใคร	ใคร่	ใคร่จะ	ใครๆ	ง่าย	ง่ายๆ	ไง	จง	จด	จน	จนกระทั่ง	จนกว่า	จนขณะนี้	จนตลอด	จนถึง	จนทั่ว	จนบัดนี้	จนเมื่อ	จนแม้	จนแม้น
+จรด	จรดกับ	จริง	จริงจัง	จริงๆ	จริงๆจังๆ	จวน	จวนจะ	จวนเจียน	จวบ	ซึ่งก็	ซึ่งก็คือ	ซึ่งกัน	ซึ่งกันและกัน	ซึ่งได้แก่	ซึ่งๆ	ณ	ด้วย	ด้วยกัน	ด้วยเช่นกัน	ด้วยที่	ด้วยประการฉะนี้
+ด้วยเพราะ	ด้วยว่า	ด้วยเหตุที่	ด้วยเหตุนั้น	ด้วยเหตุนี้	ด้วยเหตุเพราะ	ด้วยเหตุว่า	ด้วยเหมือนกัน	ดั่ง	ดังกล่าว	ดังกับ	ดั่งกับ	ดังกับว่า	ดั่งกับว่า	ดังเก่า
+ดั่งเก่า	ดังเคย	ใดๆ	ได้	ได้แก่	ได้แต่	ได้ที่	ได้มา	ได้รับ	ตน	ตนเอง	ตนฯ	ตรง	ตรงๆ	ตลอด	ตลอดกาล	ตลอดกาลนาน	ตลอดจน	ตลอดถึง	ตลอดทั้ง
+ตลอดทั่ว	ตลอดทั่วถึง	ตลอดทั่วทั้ง	ตลอดปี	ตลอดไป	ตลอดมา	ตลอดระยะเวลา	ตลอดวัน	ตลอดเวลา	ตลอดศก	ต่อ	ต่อกัน	ถึงแก่	ถึงจะ	ถึงบัดนั้น	ถึงบัดนี้
+ถึงเมื่อ	ถึงเมื่อใด	ถึงเมื่อไร	ถึงแม้	ถึงแม้จะ	ถึงแม้ว่า	ถึงอย่างไร	ถือ	ถือว่า	ถูกต้อง	ถูกๆ	เถอะ	เถิด	ทรง	ทว่า	ทั้งคน	ทั้งตัว	ทั้งที	ทั้งที่	ทั้งนั้น	ทั้งนั้นด้วย	ทั้งนั้นเพราะ
+นอก	นอกจากที่	นอกจากนั้น	นอกจากนี้	นอกจากว่า	นอกนั้น	นอกเหนือ	นอกเหนือจาก	น้อย	น้อยกว่า	น้อยๆ	นะ	น่ะ	นักๆ	นั่น	นั่นไง	นั่นเป็น	นั่นแหละ
+นั่นเอง	นั้นๆ	นับ	นับจากนั้น	นับจากนี้	นับตั้งแต่	นับแต่	นับแต่ที่	นับแต่นั้น	เป็นต้น	เป็นต้นไป	เป็นต้นมา	เป็นแต่	เป็นแต่เพียง	เป็นที	เป็นที่	เป็นที่สุด	เป็นเพราะ
+เป็นเพราะว่า	เป็นเพียง	เป็นเพียงว่า	เป็นเพื่อ	เป็นอัน	เป็นอันมาก	เป็นอันว่า	เป็นอันๆ	เป็นอาทิ	เป็นๆ	เปลี่ยน	เปลี่ยนแปลง	เปิด	เปิดเผย	ไป่	ผ่าน	ผ่านๆ
+ผิด	ผิดๆ	ผู้	เพียงเพื่อ	เพียงไร	เพียงไหน	เพื่อที่	เพื่อที่จะ	เพื่อว่า	เพื่อให้	ภาค	ภาคฯ	ภาย	ภายใต้	ภายนอก	ภายใน	ภายภาค	ภายภาคหน้า	ภายหน้า	ภายหลัง
+มอง	มองว่า	มัก	มักจะ	มัน	มันๆ	มั้ย	มั้ยนะ	มั้ยนั่น	มั้ยเนี่ย	มั้ยล่ะ	ยืนนาน	ยืนยง	ยืนยัน	ยืนยาว	เยอะ	เยอะแยะ	เยอะๆ	แยะ	แยะๆ	รวด	รวดเร็ว	ร่วม	รวมกัน	ร่วมกัน
+รวมด้วย	ร่วมด้วย	รวมถึง	รวมทั้ง	ร่วมมือ	รวมๆ	ระยะ	ระยะๆ	ระหว่าง	รับรอง	รึ	รึว่า	รือ	รือว่า	สิ้นกาลนาน	สืบเนื่อง	สุดๆ	สู่	สูง	สูงกว่า	สูงส่ง	สูงสุด	สูงๆ	เสมือนกับ
+เสมือนว่า	เสร็จ	เสร็จกัน	เสร็จแล้ว	เสร็จสมบูรณ์	เสร็จสิ้น	เสีย	เสียก่อน	เสียจน	เสียจนกระทั่ง	เสียจนถึง	เสียด้วย	เสียนั่น	เสียนั่นเอง	เสียนี่	เสียนี่กระไร	เสียยิ่ง
+เสียยิ่งนัก	เสียแล้ว	ใหญ่ๆ	ให้ดี	ให้แด่	ให้ไป	ใหม่	ให้มา	ใหม่ๆ	ไหน	ไหนๆ	อดีต	อนึ่ง	อย่าง	อย่างเช่น	อย่างดี	อย่างเดียว	อย่างใด	อย่างที่	อย่างน้อย	อย่างนั้น
+อย่างนี้	อย่างโน้น	ก็คือ	ก็แค่	ก็จะ	ก็ดี	ก็ได้	ก็ต่อเมื่อ	ก็ตาม	ก็ตามแต่	ก็ตามที	ก็แล้วแต่	กระทั่ง	กระทำ	กระนั้น	กระผม	กลับ	กล่าวคือ	กลุ่ม	กลุ่มก้อน
+กลุ่มๆ	กว้าง	กว้างขวาง	กว้างๆ	ก่อนหน้า	ก่อนหน้านี้	ก่อนๆ	กันดีกว่า	กันดีไหม	กันเถอะ	กันนะ	กันและกัน	กันไหม	กันเอง	กำลัง	กำลังจะ	กำหนด	กู	เก็บ
+เกิด	เกี่ยวข้อง	แก่	แก้ไข	ใกล้	ใกล้ๆ	ข้า	ข้าง	ข้างเคียง	ข้างต้น	ข้างบน	ข้างล่าง	ข้างๆ	ขาด	ข้าพเจ้า	ข้าฯ	เข้าใจ	เขียน	คงจะ	คงอยู่	ครบ	ครบครัน	ครบถ้วน
+ครั้งกระนั้น	ครั้งก่อน	ครั้งครา	ครั้งคราว	ครั้งใด	ครั้งที่	ครั้งนั้น	ครั้งนี้	ครั้งละ	ครั้งหนึ่ง	ครั้งหลัง	ครั้งหลังสุด	ครั้งไหน	ครั้งๆ	ครัน	ครับ	ครา	คราใด	คราที่	ครานั้น	ครานี้	คราหนึ่ง
+คราไหน	คราว	คราวก่อน	คราวใด	คราวที่	คราวนั้น	คราวนี้	คราวโน้น	คราวละ	คราวหน้า	คราวหนึ่ง	คราวหลัง	คราวไหน	คราวๆ	คล้าย	คล้ายกัน	คล้ายกันกับ
+คล้ายกับ	คล้ายกับว่า	คล้ายว่า	ควร	ค่อน	ค่อนข้าง	ค่อนข้างจะ	ค่อยไปทาง	ค่อนมาทาง	ค่อย	ค่อยๆ	คะ	ค่ะ	คำ	คิด	คิดว่า	คุณ	คุณๆ
+เคยๆ	แค่	แค่จะ	แค่นั้น	แค่นี้	แค่เพียง	แค่ว่า	แค่ไหน	ใคร่	ใคร่จะ	ง่าย	ง่ายๆ	จนกว่า	จนแม้	จนแม้น	จังๆ	จวบกับ	จวบจน	จ้ะ	จ๊ะ	จะได้	จัง	จัดการ	จัดงาน	จัดแจง
+จัดตั้ง	จัดทำ	จัดหา	จัดให้	จับ	จ้า	จ๋า	จากนั้น	จากนี้ 	จากนี้ไป	จำ	จำเป็น 	จำพวก	จึงจะ	จึงเป็น	จู่ๆ	ฉะนั้น	ฉะนี้	ฉัน	เฉกเช่น	เฉย	เฉยๆ	ไฉน	ช่วงก่อน
+ช่วงต่อไป	ช่วงถัดไป	ช่วงท้าย	ช่วงที่	ช่วงนั้น	ช่วงนี้	ช่วงระหว่าง	ช่วงแรก	ช่วงหน้า	ช่วงหลัง	ช่วงๆ	ช่วย	ช้า	ช้านาน	ชาว	ช้าๆ	เช่นก่อน	เช่นกัน	เช่นเคย
+เช่นดัง	เช่นดังก่อน	เช่นดังเก่า	เช่นดังที่	เช่นดังว่า	เช่นเดียวกัน	เช่นเดียวกับ	เช่นใด	เช่นที่	เช่นที่เคย	เช่นที่ว่า	เช่นนั้น	เช่นนั้นเอง	เช่นนี้	เช่นเมื่อ	เช่นไร	เชื่อ
+เชื่อถือ	เชื่อมั่น	เชื่อว่า	ใช่	ใช่ไหม	ใช้	ซะ	ซะก่อน	ซะจน	ซะจนกระทั่ง	ซะจนถึง	ซึ่งได้แก่	ด้วยกัน	ด้วยเช่นกัน	ด้วยที่	ด้วยเพราะ	ด้วยว่า	ด้วยเหตุที่	ด้วยเหตุนั้น
+ด้วยเหตุนี้	ด้วยเหตุเพราะ	ด้วยเหตุว่า	ด้วยเหมือนกัน	ดังกล่าว	ดังกับว่า	ดั่งกับว่า	ดังเก่า	ดั่งเก่า	ดั่งเคย	ต่างก็	ต่างหาก	ตามด้วย	ตามแต่	ตามที่
+ตามๆ	เต็มไปด้วย	เต็มไปหมด	เต็มๆ	แต่ก็	แต่ก่อน	แต่จะ	แต่เดิม	แต่ต้อง	แต่ถ้า	แต่ทว่า	แต่ที่	แต่นั้น	แต่เพียง	แต่เมื่อ	แต่ไร	แต่ละ	แต่ว่า	แต่ไหน	แต่อย่างใด	โต
+โตๆ	ใต้	ถ้าจะ	ถ้าหาก	ถึงแก่	ถึงแม้	ถึงแม้จะ	ถึงแม้ว่า	ถึงอย่างไร	ถือว่า	ถูกต้อง	ทว่า	ทั้งนั้นด้วย	ทั้งปวง	ทั้งเป็น	ทั้งมวล	ทั้งสิ้น	ทั้งหมด	ทั้งหลาย	ทั้งๆ	ทัน
+ทันใดนั้น	ทันที	ทันทีทันใด	ทั่ว	ทำไม	ทำไร	ทำให้	ทำๆ	ที	ที่จริง	ที่ซึ่ง	ทีเดียว	ทีใด	ที่ใด	ที่ได้	ทีเถอะ	ที่แท้	ที่แท้จริง	ที่นั้น	ที่นี้	ทีไร	ทีละ	ที่ละ
+ที่แล้ว	ที่ว่า	ที่แห่งนั้น	ที่ไหน	ทีๆ	ที่ๆ	ทุกคน	ทุกครั้ง	ทุกครา	ทุกคราว	ทุกชิ้น	ทุกตัว	ทุกทาง	ทุกที	ทุกที่	ทุกเมื่อ	ทุกวัน	ทุกวันนี้	ทุกสิ่ง	ทุกหน	ทุกแห่ง	ทุกอย่าง
+ทุกอัน	ทุกๆ	เท่า	เท่ากัน	เท่ากับ	เท่าใด	เท่าที่	เท่านั้น	เท่านี้	เท่าไร	เท่าไหร่	แท้	แท้จริง	เธอ	นอกจากว่า	น้อย	น้อยกว่า	น้อยๆ	น่ะ	นั้นไว	นับแต่นี้	นาง
+นางสาว	น่าจะ	นาน	นานๆ	นาย	นำ	นำพา	นำมา	นิด	นิดหน่อย	นิดๆ	นี่	นี่ไง	นี่นา	นี่แน่ะ	นี่แหละ	นี้แหล่	นี่เอง	นี้เอง	นู่น	นู้น	เน้น	เนี่ย
+เนี่ยเอง	ในช่วง	ในที่	ในเมื่อ	ในระหว่าง	บน	บอก	บอกแล้ว	บอกว่า	บ่อย	บ่อยกว่า	บ่อยครั้ง	บ่อยๆ	บัดดล	บัดเดี๋ยวนี้	บัดนั้น	บัดนี้	บ้าง	บางกว่า
+บางขณะ	บางครั้ง	บางครา	บางคราว	บางที	บางที่	บางแห่ง	บางๆ	ปฏิบัติ	ประกอบ	ประการ	ประการฉะนี้	ประการใด	ประการหนึ่ง	ประมาณ	ประสบ	ปรับ
+ปรากฏ	ปรากฏว่า	ปัจจุบัน	ปิด	เป็นด้วย	เป็นดัง	เป็นต้น	เป็นแต่	เป็นเพื่อ	เป็นอัน	เป็นอันมาก	เป็นอาทิ	ผ่านๆ	ผู้	ผู้ใด	เผื่อ	เผื่อจะ	เผื่อที่	เผื่อว่า	ฝ่าย
+ฝ่ายใด	พบว่า	พยายาม	พร้อมกัน	พร้อมกับ	พร้อมด้วย	พร้อมทั้ง	พร้อมที่	พร้อมเพียง	พวก	พวกกัน	พวกกู	พวกแก	พวกเขา	พวกคุณ	พวกฉัน	พวกท่าน
+พวกที่	พวกเธอ	พวกนั้น	พวกนี้	พวกนู้น	พวกโน้น	พวกมัน	พวกมึง	พอ	พอกัน	พอควร	พอจะ	พอดี	พอตัว	พอที	พอที่	พอเพียง	พอแล้ว	พอสม	พอสมควร
+พอเหมาะ	พอๆ	พา	พึง	พึ่ง	พื้นๆ	พูด	เพราะฉะนั้น	เพราะว่า	เพิ่ง	เพิ่งจะ	เพิ่ม	เพิ่มเติม	เพียง	เพียงแค่	เพียงใด	เพียงแต่	เพียงพอ	เพียงเพราะ
+เพื่อว่า	เพื่อให้	ภายใต้	มองว่า	มั๊ย	มากกว่า	มากมาย	มิ	มิฉะนั้น	มิใช่	มิได้	มีแต่	มึง	มุ่ง	มุ่งเน้น	มุ่งหมาย	เมื่อก่อน	เมื่อครั้ง	เมื่อครั้งก่อน
+เมื่อคราวก่อน	เมื่อคราวที่	เมื่อคราว	เมื่อคืน	เมื่อเช้า	เมื่อใด	เมื่อนั้น	เมื่อนี้	เมื่อเย็น	เมื่อไร	เมื่อวันวาน	เมื่อวาน	เมื่อไหร่	แม้	แม้กระทั่ง	แม้แต่	แม้นว่า	แม้ว่า
+ไม่ค่อย	ไม่ค่อยจะ	ไม่ค่อยเป็น	ไม่ใช่	ไม่เป็นไร	ไม่ว่า	ยก	ยกให้	ยอม	ยอมรับ	ย่อม	ย่อย	ยังคง	ยังงั้น	ยังงี้	ยังโง้น	ยังไง	ยังจะ	ยังแต่	ยาก
+ยาว	ยาวนาน	ยิ่ง	ยิ่งกว่า	ยิ่งขึ้น	ยิ่งขึ้นไป	ยิ่งจน	ยิ่งจะ	ยิ่งนัก	ยิ่งเมื่อ	ยิ่งแล้ว	ยิ่งใหญ่	ร่วมกัน	รวมด้วย	ร่วมด้วย	รือว่า	เร็ว	เร็วๆ	เราๆ	เรียก	เรียบ	เรื่อย
+เรื่อยๆ	ไร	ล้วน	ล้วนจน	ล้วนแต่	ละ	ล่าสุด	เล็ก	เล็กน้อย	เล็กๆ	เล่าว่า	แล้วกัน	แล้วแต่	แล้วเสร็จ	วันใด	วันนั้น	วันนี้	วันไหน	สบาย	สมัย	สมัยก่อน
+สมัยนั้น	สมัยนี้	สมัยโน้น	ส่วนเกิน	ส่วนด้อย	ส่วนดี	ส่วนใด	ส่วนที่	ส่วนน้อย	ส่วนนั้น	ส่วนมาก	ส่วนใหญ่	สั้น	สั้นๆ	สามารถ	สำคัญ	สิ่ง
+สิ่งใด	สิ่งนั้น	สิ่งนี้	สิ่งไหน	สิ้น	เสร็จแล้ว	เสียด้วย	เสียแล้ว	แสดง	แสดงว่า	หน	หนอ	หนอย	หน่อย	หมด	หมดกัน	หมดสิ้น	หรือไง	หรือเปล่า	หรือไม่	หรือยัง
+หรือไร	หากแม้	หากแม้น	หากแม้นว่า	หากว่า	หาความ	หาใช่	หารือ	เหตุ	เหตุผล	เหตุนั้น	เหตุนี้	เหตุไร	เห็นแก่	เห็นควร	เห็นจะ	เห็นว่า	เหลือ	เหลือเกิน	เหล่า
+เหล่านั้น	เหล่านี้	แห่งใด	แห่งนั้น	แห่งนี้	แห่งโน้น	แห่งไหน	แหละ	ให้แก่	ใหญ่	ใหญ่โต	อย่างเช่น	อย่างดี	อย่างเดียว	อย่างใด	อย่างที่	อย่างน้อย	อย่างนั้น	อย่างนี้
+อย่างโน้น	อย่างมาก	อย่างยิ่ง	อย่างไร	อย่างไรก็	อย่างไรก็ได้	อย่างไรเสีย	อย่างละ	อย่างหนึ่ง	อย่างไหน	อย่างๆ	อัน	อันจะ	อันใด	อันได้แก่	อันที่
+อันที่จริง	อันที่จะ	อันเนื่องมาจาก	อันละ	อันไหน	อันๆ	อาจจะ	อาจเป็น	อาจเป็นด้วย	อื่น	อื่นๆ	เอ็ง	เอา	ฯ	ฯล	ฯลฯ
+""".split())
--- a/spacy/lang/th/tag_map.py
+++ b/spacy/lang/th/tag_map.py
@ -0,0 +1,81 @@
+# encoding: utf8
+# data from Korakot Chaovavanich (https://www.facebook.com/photo.php?fbid=390564854695031&set=p.390564854695031&type=3&permPage=1&ifg=1)
+from __future__ import unicode_literals
+
+from ...symbols import *
+
+TAG_MAP = {
+    #NOUN
+    "NOUN":     {POS: NOUN},
+    "NCMN":     {POS: NOUN},
+    "NTTL":     {POS: NOUN},
+    "CNIT":     {POS: NOUN},
+    "CLTV":     {POS: NOUN},
+    "CMTR":     {POS: NOUN},
+    "CFQC":     {POS: NOUN},
+    "CVBL":     {POS: NOUN},
+    #PRON
+    "PRON":     {POS: PRON},
+    "NPRP":     {POS: PRON},
+    # ADJ
+    "ADJ":      {POS: ADJ},
+    "NONM":      {POS: ADJ},
+    "VATT":      {POS: ADJ},
+    "DONM":      {POS: ADJ},
+    # ADV
+    "ADV":      {POS: ADV},
+    "ADVN":      {POS: ADV},
+    "ADVI":      {POS: ADV},
+    "ADVP":      {POS: ADV},
+    "ADVS":      {POS: ADV},
+	# INT
+    "INT":      {POS: INTJ},
+    # PRON
+    "PROPN":    {POS: PROPN},
+    "PPRS":    {POS: PROPN},
+    "PDMN":    {POS: PROPN},
+    "PNTR":    {POS: PROPN},
+    # DET
+    "DET":      {POS: DET},
+    "DDAN":      {POS: DET},
+    "DDAC":      {POS: DET},
+    "DDBQ":      {POS: DET},
+    "DDAQ":      {POS: DET},
+    "DIAC":      {POS: DET},
+    "DIBQ":      {POS: DET},
+    "DIAQ":      {POS: DET},
+    "DCNM":      {POS: DET},
+    # NUM
+    "NUM":      {POS: NUM},
+    "NCNM":      {POS: NUM},
+    "NLBL":      {POS: NUM},
+    "DCNM":      {POS: NUM},
+	# AUX
+    "AUX":      {POS: AUX},
+    "XVBM":      {POS: AUX},
+    "XVAM":      {POS: AUX},
+    "XVMM":      {POS: AUX},
+    "XVBB":      {POS: AUX},
+    "XVAE":      {POS: AUX},
+	# ADP
+    "ADP":      {POS: ADP},
+    "RPRE":      {POS: ADP},
+    # CCONJ
+    "CCONJ":    {POS: CCONJ},
+    "JCRG":    {POS: CCONJ},
+	# SCONJ
+    "SCONJ":    {POS: SCONJ},
+    "PREL":    {POS: SCONJ},
+    "JSBR":    {POS: SCONJ},
+    "JCMP":    {POS: SCONJ},
+    # PART
+    "PART":    {POS: PART},
+    "FIXN":    {POS: PART},
+    "FIXV":    {POS: PART},
+    "EAFF":    {POS: PART},
+    "AITT":    {POS: PART},
+    "NEG":    {POS: PART},
+    # PUNCT
+    "PUNCT":    {POS: PUNCT},
+    "PUNC":    {POS: PUNCT}
+}
--- a/spacy/lang/th/tokenizer_exceptions.py
+++ b/spacy/lang/th/tokenizer_exceptions.py
@ -0,0 +1,43 @@
+# encoding: utf8
+from __future__ import unicode_literals
+
+from ...symbols import *
+
+TOKENIZER_EXCEPTIONS = {
+    "ม.ค.": [
+        {ORTH: "ม.ค.", LEMMA: "มกราคม"}
+    ],
+    "ก.พ.": [
+        {ORTH: "ก.พ.", LEMMA: "กุมภาพันธ์"}
+    ],
+    "มี.ค.": [
+        {ORTH: "มี.ค.", LEMMA: "มีนาคม"}
+    ],
+    "เม.ย.": [
+        {ORTH: "เม.ย.", LEMMA: "เมษายน"}
+    ],
+    "พ.ค.": [
+        {ORTH: "พ.ค.", LEMMA: "พฤษภาคม"}
+    ],
+    "มิ.ย.": [
+        {ORTH: "มิ.ย.", LEMMA: "มิถุนายน"}
+    ],
+    "ก.ค.": [
+        {ORTH: "ก.ค.", LEMMA: "กรกฎาคม"}
+    ],
+    "ส.ค.": [
+        {ORTH: "ส.ค.", LEMMA: "สิงหาคม"}
+    ],
+    "ก.ย.": [
+        {ORTH: "ก.ย.", LEMMA: "กันยายน"}
+    ],
+    "ต.ค.": [
+        {ORTH: "ต.ค.", LEMMA: "ตุลาคม"}
+    ],
+    "พ.ย.": [
+        {ORTH: "พ.ย.", LEMMA: "พฤศจิกายน"}
+    ],
+    "ธ.ค.": [
+        {ORTH: "ธ.ค.", LEMMA: "ธันวาคม"}
+    ]
+}
--- a/spacy/lang/zh/init.py
+++ b/spacy/lang/zh/init.py
@ -14,8 +14,8 @@ class Chinese(Language):
        except ImportError:
            raise ImportError("The Chinese tokenizer requires the Jieba library: "
                              "https://github.com/fxsjy/jieba")
-        words = list(jieba.cut(text, cut_all=True))
-        words=[x for x in words if x]
+        words = list(jieba.cut(text, cut_all=False))
+        words = [x for x in words if x]
        return Doc(self.vocab, words=words, spaces=[False]*len(words))


--- a/spacy/language.py
+++ b/spacy/language.py
@ -34,6 +34,7 @@ from .lang.tag_map import TAG_MAP
 from .lang.lex_attrs import LEX_ATTRS
 from . import util
 from .scorer import Scorer
+from ._ml import link_vectors_to_models


 class BaseDefaults(object):
@ -278,8 +279,7 @@ class Language(object):
    def make_doc(self, text):
        return self.tokenizer(text)

-    def update(self, docs, golds, drop=0., sgd=None, losses=None,
-            update_shared=False):
+    def update(self, docs, golds, drop=0., sgd=None, losses=None):
        """Update the models in the pipeline.

        docs (iterable): A batch of `Doc` objects.
@ -303,32 +303,17 @@ class Language(object):
            if self._optimizer is None:
                self._optimizer = Adam(Model.ops, 0.001)
            sgd = self._optimizer
-        tok2vec = self.pipeline[0]
-        feats = tok2vec.doc2feats(docs)
        grads = {}
        def get_grads(W, dW, key=None):
            grads[key] = (W, dW)
-        pipes = list(self.pipeline[1:])
+        pipes = list(self.pipeline)
        random.shuffle(pipes)
-        tokvecses, bp_tokvecses = tok2vec.model.begin_update(feats, drop=drop)
-        all_d_tokvecses = [tok2vec.model.ops.allocate(tv.shape) for tv in tokvecses]
        for proc in pipes:
            if not hasattr(proc, 'update'):
                continue
-            d_tokvecses = proc.update((docs, tokvecses), golds,
-                                      drop=drop, sgd=get_grads, losses=losses)
-            if update_shared and d_tokvecses is not None:
-                for i, d_tv in enumerate(d_tokvecses):
-                    all_d_tokvecses[i] += d_tv
-        if update_shared and bp_tokvecses is not None:
-            bp_tokvecses(all_d_tokvecses, sgd=sgd)
+            proc.update(docs, golds, drop=drop, sgd=get_grads, losses=losses)
        for key, (W, dW) in grads.items():
            sgd(W, dW, key=key)
-        # Clear the tensor variable, to free GPU memory.
-        # If we don't do this, the memory leak gets pretty
-        # bad, because we may be holding part of a batch.
-        for doc in docs:
-            doc.tensor = None

    def preprocess_gold(self, docs_golds):
        """Can be called before training to pre-process gold data. By default,
@ -343,36 +328,49 @@ class Language(object):
        for doc, gold in docs_golds:
            yield doc, gold

-    def begin_training(self, get_gold_tuples, **cfg):
+    def resume_training(self, **cfg):
+        if cfg.get('device', -1) >= 0:
+            device = util.use_gpu(cfg['device'])
+            if self.vocab.vectors.data.shape[1] >= 1:
+                self.vocab.vectors.data = Model.ops.asarray(
+                    self.vocab.vectors.data)
+        else:
+            device = None
+        learn_rate = util.env_opt('learn_rate', 0.001)
+        beta1 = util.env_opt('optimizer_B1', 0.9)
+        beta2 = util.env_opt('optimizer_B2', 0.999)
+        eps = util.env_opt('optimizer_eps', 1e-08)
+        L2 = util.env_opt('L2_penalty', 1e-6)
+        max_grad_norm = util.env_opt('grad_norm_clip', 1.)
+        self._optimizer = Adam(Model.ops, learn_rate, L2=L2, beta1=beta1,
+                              beta2=beta2, eps=eps)
+        self._optimizer.max_grad_norm = max_grad_norm
+        self._optimizer.device = device
+        return self._optimizer
+
+    def begin_training(self, get_gold_tuples=None, **cfg):
        """Allocate models, pre-process training data and acquire a trainer and
        optimizer. Used as a contextmanager.

-        gold_tuples (iterable): Gold-standard training data.
+        get_gold_tuples (function): Function returning gold data
        **cfg: Config parameters.
-        YIELDS (tuple): A trainer and an optimizer.
-
-        EXAMPLE:
-            >>> with nlp.begin_training(gold, use_gpu=True) as (trainer, optimizer):
-            >>>    for epoch in trainer.epochs(gold):
-            >>>        for docs, golds in epoch:
-            >>>            state = nlp.update(docs, golds, sgd=optimizer)
+        returns: An optimizer
        """
-        if self.parser:
-            self.pipeline.append(NeuralLabeller(self.vocab))
        # Populate vocab
-        for _, annots_brackets in get_gold_tuples():
-            for annots, _ in annots_brackets:
-                for word in annots[1]:
-                    _ = self.vocab[word]
+        if get_gold_tuples is not None:
+            for _, annots_brackets in get_gold_tuples():
+                for annots, _ in annots_brackets:
+                    for word in annots[1]:
+                        _ = self.vocab[word]
        contexts = []
        if cfg.get('device', -1) >= 0:
-            import cupy.cuda.device
-            device = cupy.cuda.device.Device(cfg['device'])
-            device.use()
-            Model.ops = CupyOps()
-            Model.Ops = CupyOps
+            device = util.use_gpu(cfg['device'])
+            if self.vocab.vectors.data.shape[1] >= 1:
+                self.vocab.vectors.data = Model.ops.asarray(
+                    self.vocab.vectors.data)
        else:
            device = None
+        link_vectors_to_models(self.vocab)
        for proc in self.pipeline:
            if hasattr(proc, 'begin_training'):
                context = proc.begin_training(get_gold_tuples(),
@ -390,7 +388,7 @@ class Language(object):
        self._optimizer.device = device
        return self._optimizer

-    def evaluate(self, docs_golds):
+    def evaluate(self, docs_golds, verbose=False):
        scorer = Scorer()
        docs, golds = zip(*docs_golds)
        docs = list(docs)
@ -403,8 +401,9 @@ class Language(object):
                docs = list(pipe.pipe(docs))
        assert len(docs) == len(golds)
        for doc, gold in zip(docs, golds):
-            scorer.score(doc, gold)
-            doc.tensor = None
+            if verbose:
+                print(doc)
+            scorer.score(doc, gold, verbose=verbose)
        return scorer

    @contextmanager
@ -493,7 +492,6 @@ class Language(object):
        """
        path = util.ensure_path(path)
        serializers = OrderedDict((
-            ('vocab', lambda p: self.vocab.to_disk(p)),
            ('tokenizer', lambda p: self.tokenizer.to_disk(p, vocab=False)),
            ('meta.json', lambda p: p.open('w').write(json_dumps(self.meta)))
        ))
@ -505,6 +503,7 @@ class Language(object):
            if not hasattr(proc, 'to_disk'):
                continue
            serializers[proc.name] = lambda p, proc=proc: proc.to_disk(p, vocab=False)
+        serializers['vocab'] = lambda p: self.vocab.to_disk(p)
        util.to_disk(path, serializers, {p: False for p in disable})

    def from_disk(self, path, disable=tuple()):
--- a/spacy/lemmatizer.py
+++ b/spacy/lemmatizer.py
@ -38,7 +38,8 @@ class Lemmatizer(object):
        avoid lemmatization entirely.
        """
        morphology = {} if morphology is None else morphology
-        others = [key for key in morphology if key not in (POS, 'number', 'pos', 'verbform')]
+        others = [key for key in morphology
+                  if key not in (POS, 'Number', 'POS', 'VerbForm', 'Tense')]
        true_morph_key = morphology.get('morph', 0)
        if univ_pos == 'noun' and morphology.get('Number') == 'sing':
            return True
@ -47,7 +48,9 @@ class Lemmatizer(object):
        # This maps 'VBP' to base form -- probably just need 'IS_BASE'
        # morphology
        elif univ_pos == 'verb' and (morphology.get('VerbForm') == 'fin' and \
-                                     morphology.get('Tense') == 'pres'):
+                                     morphology.get('Tense') == 'pres' and \
+                                     morphology.get('Number') is None and \
+                                     not others):
            return True
        elif univ_pos == 'adj' and morphology.get('Degree') == 'pos':
            return True
--- a/spacy/matcher.pyx
+++ b/spacy/matcher.pyx
@ -421,47 +421,69 @@ cdef class PhraseMatcher:
    cdef int max_length
    cdef attr_t* _phrase_key

-    def __init__(self, Vocab vocab, phrases, max_length=10):
+    cdef public object _callbacks
+    cdef public object _patterns
+
+    def __init__(self, Vocab vocab, max_length=10):
        self.mem = Pool()
        self._phrase_key = <attr_t*>self.mem.alloc(max_length, sizeof(attr_t))
        self.max_length = max_length
        self.vocab = vocab
-        self.matcher = Matcher(self.vocab, {})
+        self.matcher = Matcher(self.vocab)
        self.phrase_ids = PreshMap()
-        for phrase in phrases:
-            if len(phrase) < max_length:
-                self.add(phrase)
-
        abstract_patterns = []
        for length in range(1, max_length):
            abstract_patterns.append([{tag: True} for tag in get_bilou(length)])
-        self.matcher.add('Candidate', 'MWE', {}, abstract_patterns, acceptor=self.accept_match)
+        self.matcher.add('Candidate', None, *abstract_patterns)
+        self._callbacks = {}

-    def add(self, Doc tokens):
-        cdef int length = tokens.length
-        assert length < self.max_length
-        tags = get_bilou(length)
-        assert len(tags) == length, length
+    def __len__(self):
+        raise NotImplementedError

+    def __contains__(self, key):
+        raise NotImplementedError
+
+    def __reduce__(self):
+        return (self.__class__, (self.vocab,), None, None)
+
+    def add(self, key, on_match, *docs):
+        cdef Doc doc
+        for doc in docs:
+            if len(doc) >= self.max_length:
+                msg = (
+                    "Pattern length (%d) >= phrase_matcher.max_length (%d). "
+                    "Length can be set on initialization, up to 10."
+                )
+                raise ValueError(msg % (len(doc), self.max_length))
+        cdef hash_t ent_id = self.matcher._normalize_key(key)
+        self._callbacks[ent_id] = on_match
+
+        cdef int length
        cdef int i
-        for i in range(self.max_length):
-            self._phrase_key[i] = 0
-        for i, tag in enumerate(tags):
-            lexeme = self.vocab[tokens.c[i].lex.orth]
-            lexeme.set_flag(tag, True)
-            self._phrase_key[i] = lexeme.orth
-        cdef hash_t key = hash64(self._phrase_key, self.max_length * sizeof(attr_t), 0)
-        self.phrase_ids[key] = True
+        cdef hash_t phrase_hash
+        for doc in docs:
+            length = doc.length
+            tags = get_bilou(length)
+            for i in range(self.max_length):
+                self._phrase_key[i] = 0
+            for i, tag in enumerate(tags):
+                lexeme = self.vocab[doc.c[i].lex.orth]
+                lexeme.set_flag(tag, True)
+                self._phrase_key[i] = lexeme.orth
+            phrase_hash = hash64(self._phrase_key,
+                                 self.max_length * sizeof(attr_t), 0)
+            self.phrase_ids.set(phrase_hash, <void*>ent_id)

    def __call__(self, Doc doc):
        matches = []
-        for ent_id, label, start, end in self.matcher(doc):
-            cand = doc[start : end]
-            start = cand[0].idx
-            end = cand[-1].idx + len(cand[-1])
-            matches.append((start, end, cand.root.tag_, cand.text, 'MWE'))
-        for match in matches:
-            doc.merge(*match)
+        for _, start, end in self.matcher(doc):
+            ent_id = self.accept_match(doc, start, end)
+            if ent_id is not None:
+                matches.append((ent_id, start, end))
+        for i, (ent_id, start, end) in enumerate(matches):
+            on_match = self._callbacks.get(ent_id)
+            if on_match is not None:
+                on_match(self, doc, i, matches)
        return matches

    def pipe(self, stream, batch_size=1000, n_threads=2):
@ -469,7 +491,7 @@ cdef class PhraseMatcher:
            self(doc)
            yield doc

-    def accept_match(self, Doc doc, attr_t ent_id, attr_t label, int start, int end):
+    def accept_match(self, Doc doc, int start, int end):
        assert (end - start) < self.max_length
        cdef int i, j
        for i in range(self.max_length):
@ -477,7 +499,8 @@ cdef class PhraseMatcher:
        for i, j in enumerate(range(start, end)):
            self._phrase_key[i] = doc.c[j].lex.orth
        cdef hash_t key = hash64(self._phrase_key, self.max_length * sizeof(attr_t), 0)
-        if self.phrase_ids.get(key):
-            return (ent_id, label, start, end)
+        ent_id = <hash_t>self.phrase_ids.get(key)
+        if ent_id == 0:
+            return None
        else:
-            return False
+            return ent_id
--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@ -146,6 +146,8 @@ cdef class Morphology:
                self.add_special_case(tag_str, form_str, attrs)

    def lemmatize(self, const univ_pos_t univ_pos, attr_t orth, morphology):
+        if orth not in self.strings:
+            return orth
        cdef unicode py_string = self.strings[orth]
        if self.lemmatizer is None:
            return self.strings.add(py_string.lower())
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@ -4,7 +4,6 @@
 from __future__ import unicode_literals

 from thinc.api import chain, layerize, with_getitem
-from thinc.neural import Model, Softmax
 import numpy
 cimport numpy as np
 import cytoolz
@ -14,17 +13,18 @@ import ujson
 import msgpack

 from thinc.api import add, layerize, chain, clone, concatenate, with_flatten
-from thinc.neural import Model, Maxout, Softmax, Affine
-from thinc.neural._classes.hash_embed import HashEmbed
+from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu, SELU
+from thinc.i2v import HashEmbed
+from thinc.t2v import Pooling, max_pool, mean_pool, sum_pool
+from thinc.t2t import ExtractWindow, ParametricAttention
+from thinc.misc import Residual
+from thinc.misc import BatchNorm as BN
+from thinc.misc import LayerNorm as LN
+
 from thinc.neural.util import to_categorical

-from thinc.neural.pooling import Pooling, max_pool, mean_pool
 from thinc.neural._classes.difference import Siamese, CauchySimilarity

-from thinc.neural._classes.convolution import ExtractWindow
-from thinc.neural._classes.resnet import Residual
-from thinc.neural._classes.batchnorm import BatchNorm as BN
-
 from .tokens.doc cimport Doc
 from .syntax.parser cimport Parser as LinearParser
 from .syntax.nn_parser cimport Parser as NeuralParser
@ -41,13 +41,14 @@ from .syntax import nonproj
 from .compat import json_dumps

 from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP, POS
-from ._ml import rebatch, Tok2Vec, flatten, get_col, doc2feats
+from ._ml import rebatch, Tok2Vec, flatten
 from ._ml import build_text_classifier, build_tagger_model
+from ._ml import link_vectors_to_models
 from .parts_of_speech import X


 class SentenceSegmenter(object):
-    '''A simple spaCy hook, to allow custom sentence boundary detection logic
+    """A simple spaCy hook, to allow custom sentence boundary detection logic
    (that doesn't require the dependency parse).

    To change the sentence boundary detection strategy, pass a generator
@ -56,7 +57,7 @@ class SentenceSegmenter(object):

    Sentence detection strategies should be generators that take `Doc` objects
    and yield `Span` objects for each sentence.
-    '''
+    """
    name = 'sbd'

    def __init__(self, vocab, strategy=None):
@ -88,17 +89,30 @@ class BaseThincComponent(object):

    @classmethod
    def Model(cls, *shape, **kwargs):
+        """Initialize a model for the pipe."""
        raise NotImplementedError

    def __init__(self, vocab, model=True, **cfg):
+        """Create a new pipe instance."""
        raise NotImplementedError

    def __call__(self, doc):
+        """Apply the pipe to one document. The document is
+        modified in-place, and returned.
+
+        Both __call__ and pipe should delegate to the `predict()`
+        and `set_annotations()` methods.
+        """
        scores = self.predict([doc])
        self.set_annotations([doc], scores)
        return doc

    def pipe(self, stream, batch_size=128, n_threads=-1):
+        """Apply the pipe to a stream of documents.
+
+        Both __call__ and pipe should delegate to the `predict()`
+        and `set_annotations()` methods.
+        """
        for docs in cytoolz.partition_all(batch_size, stream):
            docs = list(docs)
            scores = self.predict(docs)
@ -106,27 +120,43 @@ class BaseThincComponent(object):
            yield from docs

    def predict(self, docs):
+        """Apply the pipeline's model to a batch of docs, without
+        modifying them.
+        """
        raise NotImplementedError

    def set_annotations(self, docs, scores):
+        """Modify a batch of documents, using pre-computed scores."""
        raise NotImplementedError

-    def update(self, docs_tensors, golds, state=None, drop=0., sgd=None, losses=None):
+    def update(self, docs, golds, drop=0., sgd=None, losses=None):
+        """Learn from a batch of documents and gold-standard information,
+        updating the pipe's model.
+
+        Delegates to predict() and get_loss().
+        """
        raise NotImplementedError

    def get_loss(self, docs, golds, scores):
+        """Find the loss and gradient of loss for the batch of
+        documents and their predicted scores."""
        raise NotImplementedError

    def begin_training(self, gold_tuples=tuple(), pipeline=None):
-        token_vector_width = pipeline[0].model.nO
+        """Initialize the pipe for training, using data exampes if available.
+        If no model has been initialized yet, the model is added."""
        if self.model is True:
-            self.model = self.Model(1, token_vector_width)
+            self.model = self.Model(**self.cfg)
+        link_vectors_to_models(self.vocab)

    def use_params(self, params):
+        """Modify the pipe's model, to use the given parameter values.
+        """
        with self.model.use_params(params):
            yield

    def to_bytes(self, **exclude):
+        """Serialize the pipe to a bytestring."""
        serialize = OrderedDict((
            ('cfg', lambda: json_dumps(self.cfg)),
            ('model', lambda: self.model.to_bytes()),
@ -135,37 +165,42 @@ class BaseThincComponent(object):
        return util.to_bytes(serialize, exclude)

    def from_bytes(self, bytes_data, **exclude):
+        """Load the pipe from a bytestring."""
        def load_model(b):
            if self.model is True:
+                self.cfg['pretrained_dims'] = self.vocab.vectors_length
                self.model = self.Model(**self.cfg)
            self.model.from_bytes(b)

        deserialize = OrderedDict((
            ('cfg', lambda b: self.cfg.update(ujson.loads(b))),
+            ('vocab', lambda b: self.vocab.from_bytes(b)),
            ('model', load_model),
-            ('vocab', lambda b: self.vocab.from_bytes(b))
        ))
        util.from_bytes(bytes_data, deserialize, exclude)
        return self

    def to_disk(self, path, **exclude):
+        """Serialize the pipe to disk."""
        serialize = OrderedDict((
            ('cfg', lambda p: p.open('w').write(json_dumps(self.cfg))),
+            ('vocab', lambda p: self.vocab.to_disk(p)),
            ('model', lambda p: p.open('wb').write(self.model.to_bytes())),
-            ('vocab', lambda p: self.vocab.to_disk(p))
        ))
        util.to_disk(path, serialize, exclude)

    def from_disk(self, path, **exclude):
+        """Load the pipe from disk."""
        def load_model(p):
            if self.model is True:
+                self.cfg['pretrained_dims'] = self.vocab.vectors_length
                self.model = self.Model(**self.cfg)
            self.model.from_bytes(p.open('rb').read())

        deserialize = OrderedDict((
            ('cfg', lambda p: self.cfg.update(_load_cfg(p))),
-            ('model', load_model),
            ('vocab', lambda p: self.vocab.from_disk(p)),
+            ('model', load_model),
        ))
        util.from_disk(path, deserialize, exclude)
        return self
@ -193,7 +228,7 @@ class TokenVectorEncoder(BaseThincComponent):
        """
        width = util.env_opt('token_vector_width', width)
        embed_size = util.env_opt('embed_size', embed_size)
-        return Tok2Vec(width, embed_size, preprocess=None)
+        return Tok2Vec(width, embed_size, **cfg)

    def __init__(self, vocab, model=True, **cfg):
        """Construct a new statistical model. Weights are not allocated on
@ -210,9 +245,10 @@ class TokenVectorEncoder(BaseThincComponent):
            >>> tok2vec.model = tok2vec.Model(128, 5000)
        """
        self.vocab = vocab
-        self.doc2feats = doc2feats()
        self.model = model
        self.cfg = dict(cfg)
+        self.cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1]
+        self.cfg.setdefault('cnn_maxout_pieces', 3)

    def __call__(self, doc):
        """Add context-sensitive vectors to a `Doc`, e.g. from a CNN or LSTM
@ -245,8 +281,7 @@ class TokenVectorEncoder(BaseThincComponent):
        docs (iterable): A sequence of `Doc` objects.
        RETURNS (object): Vector representations for each token in the documents.
        """
-        feats = self.doc2feats(docs)
-        tokvecs = self.model(feats)
+        tokvecs = self.model(docs)
        return tokvecs

    def set_annotations(self, docs, tokvecses):
@ -270,8 +305,7 @@ class TokenVectorEncoder(BaseThincComponent):
        """
        if isinstance(docs, Doc):
            docs = [docs]
-        feats = self.doc2feats(docs)
-        tokvecs, bp_tokvecs = self.model.begin_update(feats, drop=drop)
+        tokvecs, bp_tokvecs = self.model.begin_update(docs, drop=drop)
        return tokvecs, bp_tokvecs

    def get_loss(self, docs, golds, scores):
@ -285,9 +319,10 @@ class TokenVectorEncoder(BaseThincComponent):
        gold_tuples (iterable): Gold-standard training data.
        pipeline (list): The pipeline the model is part of.
        """
-        self.doc2feats = doc2feats()
        if self.model is True:
-            self.model = self.Model()
+            self.cfg['pretrained_dims'] = self.vocab.vectors_length
+            self.model = self.Model(**self.cfg)
+        link_vectors_to_models(self.vocab)


 class NeuralTagger(BaseThincComponent):
@ -296,29 +331,29 @@ class NeuralTagger(BaseThincComponent):
        self.vocab = vocab
        self.model = model
        self.cfg = dict(cfg)
+        self.cfg.setdefault('cnn_maxout_pieces', 2)
+        self.cfg.setdefault('pretrained_dims', self.vocab.vectors.data.shape[1])

    def __call__(self, doc):
-        tags = self.predict(([doc], [doc.tensor]))
+        tags = self.predict([doc])
        self.set_annotations([doc], tags)
        return doc

    def pipe(self, stream, batch_size=128, n_threads=-1):
        for docs in cytoolz.partition_all(batch_size, stream):
            docs = list(docs)
-            tokvecs = [d.tensor for d in docs]
-            tag_ids = self.predict((docs, tokvecs))
+            tag_ids = self.predict(docs)
            self.set_annotations(docs, tag_ids)
            yield from docs

-    def predict(self, docs_tokvecs):
-        scores = self.model(docs_tokvecs)
+    def predict(self, docs):
+        scores = self.model(docs)
        scores = self.model.ops.flatten(scores)
        guesses = scores.argmax(axis=1)
        if not isinstance(guesses, numpy.ndarray):
            guesses = guesses.get()
-        tokvecs = docs_tokvecs[1]
        guesses = self.model.ops.unflatten(guesses,
-                    [tv.shape[0] for tv in tokvecs])
+                    [len(d) for d in docs])
        return guesses

    def set_annotations(self, docs, batch_tag_ids):
@ -338,20 +373,16 @@ class NeuralTagger(BaseThincComponent):
                idx += 1
        doc.is_tagged = True

-    def update(self, docs_tokvecs, golds, drop=0., sgd=None, losses=None):
+    def update(self, docs, golds, drop=0., sgd=None, losses=None):
        if losses is not None and self.name not in losses:
            losses[self.name] = 0.
-        docs, tokvecs = docs_tokvecs

-        if self.model.nI is None:
-            self.model.nI = tokvecs[0].shape[1]
-        tag_scores, bp_tag_scores = self.model.begin_update(docs_tokvecs, drop=drop)
+        tag_scores, bp_tag_scores = self.model.begin_update(docs, drop=drop)
        loss, d_tag_scores = self.get_loss(docs, golds, tag_scores)
+        bp_tag_scores(d_tag_scores, sgd=sgd)

-        d_tokvecs = bp_tag_scores(d_tag_scores, sgd=sgd)
        if losses is not None:
            losses[self.name] += loss
-        return d_tokvecs

    def get_loss(self, docs, golds, scores):
        scores = self.model.ops.flatten(scores)
@ -392,13 +423,14 @@ class NeuralTagger(BaseThincComponent):
            vocab.morphology = Morphology(vocab.strings, new_tag_map,
                                          vocab.morphology.lemmatizer,
                                          exc=vocab.morphology.exc)
-        token_vector_width = pipeline[0].model.nO
        if self.model is True:
-            self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width)
+            self.cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1]
+            self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
+        link_vectors_to_models(self.vocab)

    @classmethod
-    def Model(cls, n_tags, token_vector_width):
-        return build_tagger_model(n_tags, token_vector_width)
+    def Model(cls, n_tags, **cfg):
+        return build_tagger_model(n_tags, **cfg)

    def use_params(self, params):
        with self.model.use_params(params):
@ -419,7 +451,7 @@ class NeuralTagger(BaseThincComponent):
            if self.model is True:
                token_vector_width = util.env_opt('token_vector_width',
                        self.cfg.get('token_vector_width', 128))
-                self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width)
+                self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
            self.model.from_bytes(b)

        def load_tag_map(b):
@ -438,6 +470,7 @@ class NeuralTagger(BaseThincComponent):
        return self

    def to_disk(self, path, **exclude):
+        self.cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1]
        serialize = OrderedDict((
            ('vocab', lambda p: self.vocab.to_disk(p)),
            ('tag_map', lambda p: p.open('wb').write(msgpack.dumps(
@ -452,9 +485,7 @@ class NeuralTagger(BaseThincComponent):
    def from_disk(self, path, **exclude):
        def load_model(p):
            if self.model is True:
-                token_vector_width = util.env_opt('token_vector_width',
-                        self.cfg.get('token_vector_width', 128))
-                self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width)
+                self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
            self.model.from_bytes(p.open('rb').read())

        def load_tag_map(p):
@ -466,10 +497,10 @@ class NeuralTagger(BaseThincComponent):
                exc=self.vocab.morphology.exc)

        deserialize = OrderedDict((
+            ('cfg', lambda p: self.cfg.update(_load_cfg(p))),
            ('vocab', lambda p: self.vocab.from_disk(p)),
            ('tag_map', load_tag_map),
            ('model', load_model),
-            ('cfg', lambda p: self.cfg.update(_load_cfg(p)))
        ))
        util.from_disk(path, deserialize, exclude)
        return self
@ -477,10 +508,28 @@ class NeuralTagger(BaseThincComponent):

 class NeuralLabeller(NeuralTagger):
    name = 'nn_labeller'
-    def __init__(self, vocab, model=True, **cfg):
+    def __init__(self, vocab, model=True, target='dep_tag_offset', **cfg):
        self.vocab = vocab
        self.model = model
+        if target == 'dep':
+            self.make_label = self.make_dep
+        elif target == 'tag':
+            self.make_label = self.make_tag
+        elif target == 'ent':
+            self.make_label = self.make_ent
+        elif target == 'dep_tag_offset':
+            self.make_label = self.make_dep_tag_offset
+        elif target == 'ent_tag':
+            self.make_label = self.make_ent_tag
+        elif hasattr(target, '__call__'):
+            self.make_label = target
+        else:
+            raise ValueError(
+                "NeuralLabeller target should be function or one of "
+                "['dep', 'tag', 'ent', 'dep_tag_offset', 'ent_tag']")
        self.cfg = dict(cfg)
+        self.cfg.setdefault('cnn_maxout_pieces', 2)
+        self.cfg.setdefault('pretrained_dims', self.vocab.vectors.data.shape[1])

    @property
    def labels(self):
@ -493,41 +542,79 @@ class NeuralLabeller(NeuralTagger):
    def set_annotations(self, docs, dep_ids):
        pass

-    def begin_training(self, gold_tuples=tuple(), pipeline=None):
+    def begin_training(self, gold_tuples=tuple(), pipeline=None, tok2vec=None):
        gold_tuples = nonproj.preprocess_training_data(gold_tuples)
        for raw_text, annots_brackets in gold_tuples:
            for annots, brackets in annots_brackets:
                ids, words, tags, heads, deps, ents = annots
-                for dep in deps:
-                    if dep not in self.labels:
-                        self.labels[dep] = len(self.labels)
-        token_vector_width = pipeline[0].model.nO
+                for i in range(len(ids)):
+                    label = self.make_label(i, words, tags, heads, deps, ents)
+                    if label is not None and label not in self.labels:
+                        self.labels[label] = len(self.labels)
+        print(len(self.labels))
        if self.model is True:
-            self.model = self.Model(len(self.labels), token_vector_width)
+            token_vector_width = util.env_opt('token_vector_width')
+            self.model = chain(
+                tok2vec,
+                Softmax(len(self.labels), token_vector_width)
+            )
+        link_vectors_to_models(self.vocab)

    @classmethod
-    def Model(cls, n_tags, token_vector_width):
-        return build_tagger_model(n_tags, token_vector_width)
+    def Model(cls, n_tags, tok2vec=None, **cfg):
+        return build_tagger_model(n_tags, tok2vec=tok2vec, **cfg)

    def get_loss(self, docs, golds, scores):
-        scores = self.model.ops.flatten(scores)
        cdef int idx = 0
        correct = numpy.zeros((scores.shape[0],), dtype='i')
        guesses = scores.argmax(axis=1)
        for gold in golds:
-            for tag in gold.labels:
-                if tag is None or tag not in self.labels:
+            for i in range(len(gold.labels)):
+                label = self.make_label(i, gold.words, gold.tags, gold.heads,
+                                        gold.labels, gold.ents)
+                if label is None or label not in self.labels:
                    correct[idx] = guesses[idx]
                else:
-                    correct[idx] = self.labels[tag]
+                    correct[idx] = self.labels[label]
                idx += 1
        correct = self.model.ops.xp.array(correct, dtype='i')
        d_scores = scores - to_categorical(correct, nb_classes=scores.shape[1])
        d_scores /= d_scores.shape[0]
        loss = (d_scores**2).sum()
-        d_scores = self.model.ops.unflatten(d_scores, [len(d) for d in docs])
        return float(loss), d_scores

+    @staticmethod
+    def make_dep(i, words, tags, heads, deps, ents):
+        if deps[i] is None or heads[i] is None:
+            return None
+        return deps[i]
+
+    @staticmethod
+    def make_tag(i, words, tags, heads, deps, ents):
+        return tags[i]
+
+    @staticmethod
+    def make_ent(i, words, tags, heads, deps, ents):
+        if ents is None:
+            return None
+        return ents[i]
+
+    @staticmethod
+    def make_dep_tag_offset(i, words, tags, heads, deps, ents):
+        if deps[i] is None or heads[i] is None:
+            return None
+        offset = heads[i] - i
+        offset = min(offset, 2)
+        offset = max(offset, -2)
+        return '%s-%s:%d' % (deps[i], tags[i], offset)
+
+    @staticmethod
+    def make_ent_tag(i, words, tags, heads, deps, ents):
+        if ents is None or ents[i] is None:
+            return None
+        else:
+            return '%s-%s' % (tags[i], ents[i])
+

 class SimilarityHook(BaseThincComponent):
    """
@ -555,7 +642,7 @@ class SimilarityHook(BaseThincComponent):
        return Siamese(Pooling(max_pool, mean_pool), CauchySimilarity(length))

    def __call__(self, doc):
-        '''Install similarity hook'''
+        """Install similarity hook"""
        doc.user_hooks['similarity'] = self.predict
        return doc

@ -564,15 +651,10 @@ class SimilarityHook(BaseThincComponent):
            yield self(doc)

    def predict(self, doc1, doc2):
-        return self.model.predict([(doc1.tensor, doc2.tensor)])
+        return self.model.predict([(doc1, doc2)])

-    def update(self, doc1_tensor1_doc2_tensor2, golds, sgd=None, drop=0.):
-        doc1s, tensor1s, doc2s, tensor2s = doc1_tensor1_doc2_tensor2
-        sims, bp_sims = self.model.begin_update(zip(tensor1s, tensor2s),
-                                                drop=drop)
-        d_tensor1s, d_tensor2s = bp_sims(golds, sgd=sgd)
-
-        return d_tensor1s, d_tensor2s
+    def update(self, doc1_doc2, golds, sgd=None, drop=0.):
+        sims, bp_sims = self.model.begin_update(doc1_doc2, drop=drop)

    def begin_training(self, _=tuple(), pipeline=None):
        """
@ -583,6 +665,7 @@ class SimilarityHook(BaseThincComponent):
        """
        if self.model is True:
            self.model = self.Model(pipeline[0].model.nO)
+            link_vectors_to_models(self.vocab)


 class TextCategorizer(BaseThincComponent):
@ -627,15 +710,13 @@ class TextCategorizer(BaseThincComponent):
            for j, label in enumerate(self.labels):
                doc.cats[label] = float(scores[i, j])

-    def update(self, docs_tensors, golds, state=None, drop=0., sgd=None, losses=None):
-        docs, tensors = docs_tensors
+    def update(self, docs, golds, state=None, drop=0., sgd=None, losses=None):
        scores, bp_scores = self.model.begin_update(docs, drop=drop)
        loss, d_scores = self.get_loss(docs, golds, scores)
-        d_tensors = bp_scores(d_scores, sgd=sgd)
+        bp_scores(d_scores, sgd=sgd)
        if losses is not None:
            losses.setdefault(self.name, 0.0)
            losses[self.name] += loss
-        return d_tensors

    def get_loss(self, docs, golds, scores):
        truths = numpy.zeros((len(golds), len(self.labels)), dtype='f')
@ -653,8 +734,10 @@ class TextCategorizer(BaseThincComponent):
        else:
            token_vector_width = 64
        if self.model is True:
+            self.cfg['pretrained_dims'] = self.vocab.vectors_length
            self.model = self.Model(len(self.labels), token_vector_width,
                                    **self.cfg)
+            link_vectors_to_models(self.vocab)


 cdef class EntityRecognizer(LinearParser):
@ -695,6 +778,14 @@ cdef class NeuralDependencyParser(NeuralParser):
    name = 'parser'
    TransitionSystem = ArcEager

+    def init_multitask_objectives(self, gold_tuples, pipeline, **cfg):
+        for target in []:
+            labeller = NeuralLabeller(self.vocab, target=target)
+            tok2vec = self.model[0]
+            labeller.begin_training(gold_tuples, pipeline=pipeline, tok2vec=tok2vec)
+            pipeline.append(labeller)
+            self._multitasks.append(labeller)
+
    def __reduce__(self):
        return (NeuralDependencyParser, (self.vocab, self.moves, self.model), None, None)

@ -705,13 +796,13 @@ cdef class NeuralEntityRecognizer(NeuralParser):

    nr_feature = 6

-    def predict_confidences(self, docs):
-        tensors = [d.tensor for d in docs]
-        samples = []
-        for i in range(10):
-            states = self.parse_batch(docs, tensors, drop=0.3)
-            for state in states:
-                samples.append(self._get_entities(state))
+    def init_multitask_objectives(self, gold_tuples, pipeline, **cfg):
+        for target in []:
+            labeller = NeuralLabeller(self.vocab, target=target)
+            tok2vec = self.model[0]
+            labeller.begin_training(gold_tuples, pipeline=pipeline, tok2vec=tok2vec)
+            pipeline.append(labeller)
+            self._multitasks.append(labeller)

    def __reduce__(self):
        return (NeuralEntityRecognizer, (self.vocab, self.moves, self.model), None, None)
--- a/spacy/symbols.pxd
+++ b/spacy/symbols.pxd
@ -1,4 +1,4 @@
-cpdef enum symbol_t:
+cdef enum symbol_t:
    NIL
    IS_ALPHA
    IS_ASCII
--- a/spacy/symbols.pyx
+++ b/spacy/symbols.pyx
@ -1,4 +1,6 @@
 # coding: utf8
+#cython: optimize.unpack_method_calls=False
+
 from __future__ import unicode_literals

 IDS = {
@ -458,4 +460,11 @@ IDS = {
    "xcomp": xcomp
 }

-NAMES = [it[0] for it in sorted(IDS.items(), key=lambda it: it[1])]
+def sort_nums(x):
+    return x[1]
+
+NAMES = [it[0] for it in sorted(IDS.items(), key=sort_nums)]
+# Unfortunate hack here, to work around problem with long cpdef enum
+# (which is generating an enormous amount of C++ in Cython 0.24+)
+# We keep the enum cdef, and just make sure the names are available to Python
+locals().update(IDS)
--- a/spacy/syntax/_beam_utils.pyx
+++ b/spacy/syntax/_beam_utils.pyx
@ -147,10 +147,10 @@ def get_token_ids(states, int n_tokens):

 nr_update = 0
 def update_beam(TransitionSystem moves, int nr_feature, int max_steps,
-                states, tokvecs, golds,
+                states, golds,
                state2vec, vec2scores, 
                int width, float density,
-                sgd=None, losses=None, drop=0.):
+                losses=None, drop=0.):
    global nr_update
    cdef MaxViolation violn
    nr_update += 1
--- a/spacy/syntax/_state.pxd
+++ b/spacy/syntax/_state.pxd
@ -101,9 +101,10 @@ cdef cppclass StateC:
        elif n == 6:
            if this.B(0) >= 0:
                ids[0] = this.B(0)
+                ids[1] = this.B(0)-1
            else:
                ids[0] = -1
-            ids[1] = this.B(0)
+                ids[1] = -1
            ids[2] = this.B(1)
            ids[3] = this.E(0)
            if ids[3] >= 1:
@ -120,6 +121,8 @@ cdef cppclass StateC:
        for i in range(n):
            if ids[i] >= 0:
                ids[i] += this.offset
+            else:
+                ids[i] = -1

    int S(int i) nogil const:
        if i >= this._s_i:
@ -162,9 +165,9 @@ cdef cppclass StateC:

    int E(int i) nogil const:
        if this._e_i <= 0 or this._e_i >= this.length:
-            return 0
+            return -1
        if i < 0 or i >= this._e_i:
-            return 0
+            return -1
        return this._ents[this._e_i - (i+1)].start

    int L(int i, int idx) nogil const:
--- a/spacy/syntax/ner.pyx
+++ b/spacy/syntax/ner.pyx
@ -161,8 +161,7 @@ cdef class BiluoPushDown(TransitionSystem):
    cdef Transition lookup_transition(self, object name) except *:
        cdef attr_t label
        if name == '-' or name == None:
-            move_str = 'M'
-            label = 0
+            return Transition(clas=0, move=MISSING, label=0, score=0)
        elif name == '!O':
            return Transition(clas=0, move=ISNT, label=0, score=0)
        elif '-' in name:
@ -220,6 +219,31 @@ cdef class BiluoPushDown(TransitionSystem):
            raise Exception(move)
        return t

+    #def add_action(self, int action, label_name):
+    #    cdef attr_t label_id
+    #    if not isinstance(label_name, (int, long)):
+    #        label_id = self.strings.add(label_name)
+    #    else:
+    #        label_id = label_name
+    #    if action == OUT and label_id != 0:
+    #        return
+    #    if action == MISSING or action == ISNT:
+    #        return
+    #    # Check we're not creating a move we already have, so that this is
+    #    # idempotent
+    #    for trans in self.c[:self.n_moves]:
+    #        if trans.move == action and trans.label == label_id:
+    #            return 0
+    #    if self.n_moves >= self._size:
+    #        self._size *= 2
+    #        self.c = <Transition*>self.mem.realloc(self.c, self._size * sizeof(self.c[0]))
+    #    self.c[self.n_moves] = self.init_transition(self.n_moves, action, label_id)
+    #    assert self.c[self.n_moves].label == label_id
+    #    self.n_moves += 1
+    #    return 1
+
+
+
    cdef int initialize_state(self, StateC* st) nogil:
        # This is especially necessary when we use limited training data.
        for i in range(st.length):
--- a/spacy/syntax/nn_parser.pxd
+++ b/spacy/syntax/nn_parser.pxd
@ -13,6 +13,7 @@ cdef class Parser:
    cdef public object model
    cdef readonly TransitionSystem moves
    cdef readonly object cfg
+    cdef public object _multitasks

    cdef void _parse_step(self, StateC* state,
            const float* feat_weights,
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -7,6 +7,7 @@ from __future__ import unicode_literals, print_function

 from collections import Counter, OrderedDict
 import ujson
+import json
 import contextlib

 from libc.math cimport exp
@ -37,10 +38,9 @@ from preshed.maps cimport MapStruct
 from preshed.maps cimport map_get

 from thinc.api import layerize, chain, noop, clone, with_flatten
-from thinc.neural import Model, Affine, ReLu, Maxout
-from thinc.neural._classes.batchnorm import BatchNorm as BN
-from thinc.neural._classes.selu import SELU
-from thinc.neural._classes.layernorm import LayerNorm
+from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu, SELU
+from thinc.misc import LayerNorm
+
 from thinc.neural.ops import NumpyOps, CupyOps
 from thinc.neural.util import get_array_module

@ -48,7 +48,8 @@ from .. import util
 from ..util import get_async, get_cuda_stream
 from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts
 from .._ml import Tok2Vec, doc2feats, rebatch, fine_tune
-from .._ml import Residual, drop_layer
+from .._ml import Residual, drop_layer, flatten
+from .._ml import link_vectors_to_models
 from ..compat import json_dumps

 from . import _parse_features
@ -238,14 +239,15 @@ cdef class Parser:
    Base class of the DependencyParser and EntityRecognizer.
    """
    @classmethod
-    def Model(cls, nr_class, token_vector_width=128, hidden_width=300, depth=1, **cfg):
+    def Model(cls, nr_class, token_vector_width=128, hidden_width=200, depth=1, **cfg):
        depth = util.env_opt('parser_hidden_depth', depth)
        token_vector_width = util.env_opt('token_vector_width', token_vector_width)
        hidden_width = util.env_opt('hidden_width', hidden_width)
        parser_maxout_pieces = util.env_opt('parser_maxout_pieces', 2)
-        embed_size = util.env_opt('embed_size', 4000)
-        tensors = fine_tune(Tok2Vec(token_vector_width, embed_size,
-                                    preprocess=doc2feats()))
+        embed_size = util.env_opt('embed_size', 7000)
+        tok2vec = Tok2Vec(token_vector_width, embed_size,
+                          pretrained_dims=cfg.get('pretrained_dims', 0))
+        tok2vec = chain(tok2vec, flatten)
        if parser_maxout_pieces == 1:
            lower = PrecomputableAffine(hidden_width if depth >= 1 else nr_class,
                        nF=cls.nr_feature,
@ -262,8 +264,8 @@ cdef class Parser:
                upper.is_noop = True
            else:
                upper = chain(
-                    clone(Maxout(hidden_width), (depth-1)),
-                    zero_init(Affine(nr_class, drop_factor=0.0))
+                    clone(Maxout(hidden_width), depth-1),
+                    zero_init(Affine(nr_class, hidden_width, drop_factor=0.0))
                )
                upper.is_noop = False
        # TODO: This is an unfortunate hack atm!
@ -277,7 +279,7 @@ cdef class Parser:
            'hidden_width': hidden_width,
            'maxout_pieces': parser_maxout_pieces
        }
-        return (tensors, lower, upper), cfg
+        return (tok2vec, lower, upper), cfg

    def __init__(self, Vocab vocab, moves=True, model=True, **cfg):
        """
@ -307,12 +309,16 @@ cdef class Parser:
            cfg['beam_width'] = util.env_opt('beam_width', 1)
        if 'beam_density' not in cfg:
            cfg['beam_density'] = util.env_opt('beam_density', 0.0)
+        if 'pretrained_dims' not in cfg:
+            cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1]
+        cfg.setdefault('cnn_maxout_pieces', 3)
        self.cfg = cfg
        if 'actions' in self.cfg:
            for action, labels in self.cfg.get('actions', {}).items():
                for label in labels:
                    self.moves.add_action(action, label)
        self.model = model
+        self._multitasks = []

    def __reduce__(self):
        return (Parser, (self.vocab, self.moves, self.model), None, None)
@ -332,11 +338,11 @@ cdef class Parser:
            beam_density = self.cfg.get('beam_density', 0.0)
        cdef Beam beam
        if beam_width == 1:
-            states = self.parse_batch([doc], [doc.tensor])
+            states = self.parse_batch([doc])
            self.set_annotations([doc], states)
            return doc
        else:
-            beam = self.beam_parse([doc], [doc.tensor],
+            beam = self.beam_parse([doc],
                        beam_width=beam_width, beam_density=beam_density)[0]
            output = self.moves.get_beam_annot(beam)
            state = <StateClass>beam.at(0)
@ -365,11 +371,11 @@ cdef class Parser:
        cdef Beam beam
        for docs in cytoolz.partition_all(batch_size, docs):
            docs = list(docs)
-            tokvecs = [doc.tensor for doc in docs]
            if beam_width == 1:
-                parse_states = self.parse_batch(docs, tokvecs)
+                parse_states = self.parse_batch(docs)
+                beams = []
            else:
-                beams = self.beam_parse(docs, tokvecs,
+                beams = self.beam_parse(docs,
                            beam_width=beam_width, beam_density=beam_density)
                parse_states = []
                for beam in beams:
@ -377,7 +383,7 @@ cdef class Parser:
            self.set_annotations(docs, parse_states)
            yield from docs

-    def parse_batch(self, docs, tokvecses):
+    def parse_batch(self, docs):
        cdef:
            precompute_hiddens state2vec
            StateClass state
@ -388,21 +394,15 @@ cdef class Parser:
            int nr_class, nr_feat, nr_piece, nr_dim, nr_state
        if isinstance(docs, Doc):
            docs = [docs]
-        if isinstance(tokvecses, np.ndarray):
-            tokvecses = [tokvecses]

-        tokvecs = self.model[0].ops.flatten(tokvecses)
-        if USE_FINE_TUNE:
-            tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
+        cuda_stream = get_cuda_stream()
+        (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(docs, cuda_stream,
+                                                                            0.0)

        nr_state = len(docs)
        nr_class = self.moves.n_moves
        nr_dim = tokvecs.shape[1]
        nr_feat = self.nr_feature
-
-        cuda_stream = get_cuda_stream()
-        state2vec, vec2scores = self.get_batch_model(nr_state, tokvecs,
-                                                     cuda_stream, 0.0)
        nr_piece = state2vec.nP

        states = self.moves.init_batch(docs)
@ -418,21 +418,23 @@ cdef class Parser:
        c_token_ids = <int*>token_ids.data
        c_is_valid = <int*>is_valid.data
        cdef int has_hidden = not getattr(vec2scores, 'is_noop', False)
+        cdef int nr_step
        while not next_step.empty():
+            nr_step = next_step.size()
            if not has_hidden:
-                for i in cython.parallel.prange(
-                        next_step.size(), num_threads=6, nogil=True):
+                for i in cython.parallel.prange(nr_step, num_threads=6,
+                                                nogil=True):
                    self._parse_step(next_step[i],
                        feat_weights, nr_class, nr_feat, nr_piece)
            else:
-                for i in range(next_step.size()):
+                for i in range(nr_step):
                    st = next_step[i]
                    st.set_context_tokens(&c_token_ids[i*nr_feat], nr_feat)
                    self.moves.set_valid(&c_is_valid[i*nr_class], st)
                vectors = state2vec(token_ids[:next_step.size()])
                scores = vec2scores(vectors)
                c_scores = <float*>scores.data
-                for i in range(next_step.size()):
+                for i in range(nr_step):
                    st = next_step[i]
                    guess = arg_max_if_valid(
                        &c_scores[i*nr_class], &c_is_valid[i*nr_class], nr_class)
@ -445,18 +447,15 @@ cdef class Parser:
                    next_step.push_back(st)
        return states

-    def beam_parse(self, docs, tokvecses, int beam_width=3, float beam_density=0.001):
+    def beam_parse(self, docs, int beam_width=3, float beam_density=0.001):
        cdef Beam beam
        cdef np.ndarray scores
        cdef Doc doc
        cdef int nr_class = self.moves.n_moves
        cdef StateClass stcls, output
-        tokvecs = self.model[0].ops.flatten(tokvecses)
-        if USE_FINE_TUNE:
-            tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
        cuda_stream = get_cuda_stream()
-        state2vec, vec2scores = self.get_batch_model(len(docs), tokvecs,
-                                                     cuda_stream, 0.0)
+        (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(docs, cuda_stream,
+                                                                            0.0)
        beams = []
        cdef int offset = 0
        cdef int j = 0
@ -516,29 +515,24 @@ cdef class Parser:
        free(scores)
        free(token_ids)

-    def update(self, docs_tokvecs, golds, drop=0., sgd=None, losses=None):
+    def update(self, docs, golds, drop=0., sgd=None, losses=None):
        if not any(self.moves.has_gold(gold) for gold in golds):
            return None
        if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= 0.5:
-            return self.update_beam(docs_tokvecs, golds,
+            return self.update_beam(docs, golds,
                    self.cfg['beam_width'], self.cfg['beam_density'],
                    drop=drop, sgd=sgd, losses=losses)
        if losses is not None and self.name not in losses:
            losses[self.name] = 0.
-        docs, tokvec_lists = docs_tokvecs
-        tokvecs = self.model[0].ops.flatten(tokvec_lists)
        if isinstance(docs, Doc) and isinstance(golds, GoldParse):
            docs = [docs]
            golds = [golds]
-        if USE_FINE_TUNE:
-            my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop)
-            tokvecs = self.model[0].ops.flatten(my_tokvecs)

        cuda_stream = get_cuda_stream()

        states, golds, max_steps = self._init_gold_batch(docs, golds)
-        state2vec, vec2scores = self.get_batch_model(len(states), tokvecs, cuda_stream,
-                                                      0.0)
+        (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(docs, cuda_stream,
+                                                                            drop)
        todo = [(s, g) for (s, g) in zip(states, golds)
                if not s.is_final() and g is not None]
        if not todo:
@ -582,13 +576,9 @@ cdef class Parser:
            if n_steps >= max_steps:
                break
        self._make_updates(d_tokvecs,
-            backprops, sgd, cuda_stream)
-        d_tokvecs = self.model[0].ops.unflatten(d_tokvecs, [len(d) for d in docs])
-        if USE_FINE_TUNE:
-            d_tokvecs = bp_my_tokvecs(d_tokvecs, sgd=sgd)
-        return d_tokvecs
+            bp_tokvecs, backprops, sgd, cuda_stream)

-    def update_beam(self, docs_tokvecs, golds, width=None, density=None,
+    def update_beam(self, docs, golds, width=None, density=None,
            drop=0., sgd=None, losses=None):
        if not any(self.moves.has_gold(gold) for gold in golds):
            return None
@ -600,26 +590,20 @@ cdef class Parser:
            density = self.cfg.get('beam_density', 0.0)
        if losses is not None and self.name not in losses:
            losses[self.name] = 0.
-        docs, tokvecs = docs_tokvecs
        lengths = [len(d) for d in docs]
        assert min(lengths) >= 1
-        tokvecs = self.model[0].ops.flatten(tokvecs)
-        if USE_FINE_TUNE:
-            my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop)
-            tokvecs += self.model[0].ops.flatten(my_tokvecs)
-
        states = self.moves.init_batch(docs)
        for gold in golds:
            self.moves.preprocess_gold(gold)

        cuda_stream = get_cuda_stream()
-        state2vec, vec2scores = self.get_batch_model(len(states), tokvecs, cuda_stream, 0.0)
+        (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(docs, cuda_stream, drop)

        states_d_scores, backprops = _beam_utils.update_beam(self.moves, self.nr_feature, 500,
-                                        states, tokvecs, golds,
+                                        states, golds,
                                        state2vec, vec2scores,
                                        width, density,
-                                        sgd=sgd, drop=drop, losses=losses)
+                                        drop=drop, losses=losses)
        backprop_lower = []
        cdef float batch_size = len(docs)
        for i, d_scores in enumerate(states_d_scores):
@ -637,11 +621,7 @@ cdef class Parser:
            else:
                backprop_lower.append((ids, d_vector, bp_vectors))
        d_tokvecs = self.model[0].ops.allocate(tokvecs.shape)
-        self._make_updates(d_tokvecs, backprop_lower, sgd, cuda_stream)
-        d_tokvecs = self.model[0].ops.unflatten(d_tokvecs, lengths)
-        if USE_FINE_TUNE:
-            d_tokvecs = bp_my_tokvecs(d_tokvecs, sgd=sgd)
-        return d_tokvecs
+        self._make_updates(d_tokvecs, bp_tokvecs, backprop_lower, sgd, cuda_stream)

    def _init_gold_batch(self, whole_docs, whole_golds):
        """Make a square batch, of length equal to the shortest doc. A long
@ -679,7 +659,7 @@ cdef class Parser:
            max_moves = max(max_moves, len(oracle_actions))
        return states, golds, max_moves

-    def _make_updates(self, d_tokvecs, backprops, sgd, cuda_stream=None):
+    def _make_updates(self, d_tokvecs, bp_tokvecs, backprops, sgd, cuda_stream=None):
        # Tells CUDA to block, so our async copies complete.
        if cuda_stream is not None:
            cuda_stream.synchronize()
@ -690,6 +670,7 @@ cdef class Parser:
            d_state_features *= mask.reshape(ids.shape + (1,))
            self.model[0].ops.scatter_add(d_tokvecs, ids * mask,
                d_state_features)
+        bp_tokvecs(d_tokvecs, sgd=sgd)

    @property
    def move_names(self):
@ -699,11 +680,12 @@ cdef class Parser:
            names.append(name)
        return names

-    def get_batch_model(self, batch_size, tokvecs, stream, dropout):
-        _, lower, upper = self.model
-        state2vec = precompute_hiddens(batch_size, tokvecs,
-                        lower, stream, drop=dropout)
-        return state2vec, upper
+    def get_batch_model(self, docs, stream, dropout):
+        tok2vec, lower, upper = self.model
+        tokvecs, bp_tokvecs = tok2vec.begin_update(docs, drop=dropout)
+        state2vec = precompute_hiddens(len(docs), tokvecs,
+                                       lower, stream, drop=0.0)
+        return (tokvecs, bp_tokvecs), state2vec, upper

    nr_feature = 8

@ -766,7 +748,7 @@ cdef class Parser:
                # order, or the model goes out of synch
                self.cfg.setdefault('extra_labels', []).append(label)

-    def begin_training(self, gold_tuples, **cfg):
+    def begin_training(self, gold_tuples, pipeline=None, **cfg):
        if 'model' in cfg:
            self.model = cfg['model']
        gold_tuples = nonproj.preprocess_training_data(gold_tuples)
@ -775,9 +757,22 @@ cdef class Parser:
            for label in labels:
                self.moves.add_action(action, label)
        if self.model is True:
+            cfg['pretrained_dims'] = self.vocab.vectors_length
            self.model, cfg = self.Model(self.moves.n_moves, **cfg)
+            self.init_multitask_objectives(gold_tuples, pipeline, **cfg)
+            link_vectors_to_models(self.vocab)
            self.cfg.update(cfg)

+    def init_multitask_objectives(self, gold_tuples, pipeline, **cfg):
+        '''Setup models for secondary objectives, to benefit from multi-task
+        learning. This method is intended to be overridden by subclasses.
+
+        For instance, the dependency parser can benefit from sharing
+        an input representation with a label prediction model. These auxiliary
+        models are discarded after training.
+        '''
+        pass
+
    def preprocess_gold(self, docs_golds):
        for doc, gold in docs_golds:
            yield doc, gold
@ -813,6 +808,7 @@ cdef class Parser:
        if 'model' not in exclude:
            path = util.ensure_path(path)
            if self.model is True:
+                self.cfg['pretrained_dims'] = self.vocab.vectors_length
                self.model, cfg = self.Model(**self.cfg)
            else:
                cfg = {}
@ -835,7 +831,7 @@ cdef class Parser:
            ('upper_model', lambda: self.model[2].to_bytes()),
            ('vocab', lambda: self.vocab.to_bytes()),
            ('moves', lambda: self.moves.to_bytes(strings=False)),
-            ('cfg', lambda: ujson.dumps(self.cfg))
+            ('cfg', lambda: json.dumps(self.cfg, indent=2, sort_keys=True))
        ))
        if 'model' in exclude:
            exclude['tok2vec_model'] = True
@ -848,7 +844,7 @@ cdef class Parser:
        deserializers = OrderedDict((
            ('vocab', lambda b: self.vocab.from_bytes(b)),
            ('moves', lambda b: self.moves.from_bytes(b, strings=False)),
-            ('cfg', lambda b: self.cfg.update(ujson.loads(b))),
+            ('cfg', lambda b: self.cfg.update(json.loads(b))),
            ('tok2vec_model', lambda b: None),
            ('lower_model', lambda b: None),
            ('upper_model', lambda b: None)
@ -856,9 +852,11 @@ cdef class Parser:
        msg = util.from_bytes(bytes_data, deserializers, exclude)
        if 'model' not in exclude:
            if self.model is True:
-                self.model, cfg = self.Model(self.moves.n_moves)
+                self.model, cfg = self.Model(**self.cfg)
+                cfg['pretrained_dims'] = self.vocab.vectors_length
            else:
                cfg = {}
+            cfg['pretrained_dims'] = self.vocab.vectors_length
            if 'tok2vec_model' in msg:
                self.model[0].from_bytes(msg['tok2vec_model'])
            if 'lower_model' in msg:
--- a/spacy/syntax/transition_system.pyx
+++ b/spacy/syntax/transition_system.pyx
@ -148,7 +148,7 @@ cdef class TransitionSystem:

    def add_action(self, int action, label_name):
        cdef attr_t label_id
-        if not isinstance(label_name, int):
+        if not isinstance(label_name, (int, long)):
            label_id = self.strings.add(label_name)
        else:
            label_id = label_name
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@ -12,7 +12,7 @@ from .. import util


 _languages = ['bn', 'da', 'de', 'en', 'es', 'fi', 'fr', 'he', 'hu', 'id',
-              'it', 'nb', 'nl', 'pl', 'pt', 'sv', 'xx']
+              'it', 'nb', 'nl', 'pl', 'pt', 'sv', 'th','xx']
 _models = {'en': ['en_core_web_sm'],
           'de': ['de_core_news_md'],
           'fr': ['fr_depvec_web_lg'],
@ -108,6 +108,11 @@ def he_tokenizer():
 def nb_tokenizer():
    return util.get_lang_class('nb').Defaults.create_tokenizer()

+@pytest.fixture
+def th_tokenizer():
+    pythainlp = pytest.importorskip("pythainlp")
+    return util.get_lang_class('th').Defaults.create_tokenizer()
+

@pytest.fixture
 def stringstore():
--- a/spacy/tests/lang/de/test_prefix_suffix_infix.py
+++ b/spacy/tests/lang/de/test_prefix_suffix_infix.py
@ -67,12 +67,6 @@ def test_tokenizer_splits_uneven_wrap_interact(de_tokenizer, text):
    assert len(tokens) == 4


-@pytest.mark.parametrize('text', ["blau-rot"])
-def test_tokenizer_splits_hyphens(de_tokenizer, text):
-    tokens = de_tokenizer(text)
-    assert len(tokens) == 3
-
-
@pytest.mark.parametrize('text', ["0.1-13.5", "0.0-0.1", "103.27-300"])
 def test_tokenizer_splits_numeric_range(de_tokenizer, text):
    tokens = de_tokenizer(text)
@ -100,17 +94,21 @@ def test_tokenizer_splits_ellipsis_infix(de_tokenizer, text):
    assert len(tokens) == 3


+@pytest.mark.parametrize('text', ['Islam-Konferenz', 'Ost-West-Konflikt'])
+def test_tokenizer_keeps_hyphens(de_tokenizer, text):
+    tokens = de_tokenizer(text)
+    assert len(tokens) == 1
+
+
 def test_tokenizer_splits_double_hyphen_infix(de_tokenizer):
    tokens = de_tokenizer("Viele Regeln--wie die Bindestrich-Regeln--sind kompliziert.")
-    assert len(tokens) == 12
+    assert len(tokens) == 10
    assert tokens[0].text == "Viele"
    assert tokens[1].text == "Regeln"
    assert tokens[2].text == "--"
    assert tokens[3].text == "wie"
    assert tokens[4].text == "die"
-    assert tokens[5].text == "Bindestrich"
-    assert tokens[6].text == "-"
-    assert tokens[7].text == "Regeln"
-    assert tokens[8].text == "--"
-    assert tokens[9].text == "sind"
-    assert tokens[10].text == "kompliziert"
+    assert tokens[5].text == "Bindestrich-Regeln"
+    assert tokens[6].text == "--"
+    assert tokens[7].text == "sind"
+    assert tokens[8].text == "kompliziert"
--- a/spacy/tests/lang/de/test_text.py
+++ b/spacy/tests/lang/de/test_text.py
@ -25,15 +25,15 @@ Umfang kläglich dünnen Beine flimmerten ihm hilflos vor den Augen.
    assert len(tokens) == 109


-@pytest.mark.parametrize('text,length', [
-    ("Donaudampfschifffahrtsgesellschaftskapitänsanwärterposten", 1),
-    ("Rindfleischetikettierungsüberwachungsaufgabenübertragungsgesetz", 1),
-    ("Kraftfahrzeug-Haftpflichtversicherung", 3),
-    ("Vakuum-Mittelfrequenz-Induktionsofen", 5)
+@pytest.mark.parametrize('text', [
+    "Donaudampfschifffahrtsgesellschaftskapitänsanwärterposten",
+    "Rindfleischetikettierungsüberwachungsaufgabenübertragungsgesetz",
+    "Kraftfahrzeug-Haftpflichtversicherung",
+    "Vakuum-Mittelfrequenz-Induktionsofen"
    ])
-def test_tokenizer_handles_long_words(de_tokenizer, text, length):
+def test_tokenizer_handles_long_words(de_tokenizer, text):
    tokens = de_tokenizer(text)
-    assert len(tokens) == length
+    assert len(tokens) == 1


@pytest.mark.parametrize('text,length', [
--- a/spacy/tests/lang/th/init.py
+++ b/spacy/tests/lang/th/init.py
--- a/spacy/tests/lang/th/test_tokenizer.py
+++ b/spacy/tests/lang/th/test_tokenizer.py
@ -0,0 +1,13 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+import pytest
+
+TOKENIZER_TESTS = [
+        ("คุณรักผมไหม", ['คุณ', 'รัก', 'ผม', 'ไหม'])
+]
+
+@pytest.mark.parametrize('text,expected_tokens', TOKENIZER_TESTS)
+def test_thai_tokenizer(th_tokenizer, text, expected_tokens):
+	tokens = [token.text for token in th_tokenizer(text)]
+	assert tokens == expected_tokens
--- a/spacy/tests/parser/test_neural_parser.py
+++ b/spacy/tests/parser/test_neural_parser.py
@ -26,7 +26,7 @@ def arc_eager(vocab):

@pytest.fixture
 def tok2vec():
-    return Tok2Vec(8, 100, preprocess=doc2feats())
+    return Tok2Vec(8, 100)


@pytest.fixture
@ -61,33 +61,22 @@ def test_predict_doc(parser, tok2vec, model, doc):
    parser(doc)


-def test_update_doc(parser, tok2vec, model, doc, gold):
+def test_update_doc(parser, model, doc, gold):
    parser.model = model
-    tokvecs, bp_tokvecs = tok2vec.begin_update([doc])
-    d_tokvecs = parser.update(([doc], tokvecs), [gold])
-    assert d_tokvecs[0].shape == tokvecs[0].shape
    def optimize(weights, gradient, key=None):
        weights -= 0.001 * gradient
-    bp_tokvecs(d_tokvecs, sgd=optimize)
-    assert d_tokvecs[0].sum() == 0.
+    parser.update([doc], [gold], sgd=optimize)


-def test_predict_doc_beam(parser, tok2vec, model, doc):
-    doc.tensor = tok2vec([doc])[0]
+def test_predict_doc_beam(parser, model, doc):
    parser.model = model
    parser(doc, beam_width=32, beam_density=0.001)
-    for word in doc:
-        print(word.text, word.head, word.dep_)


-def test_update_doc_beam(parser, tok2vec, model, doc, gold):
+def test_update_doc_beam(parser, model, doc, gold):
    parser.model = model
-    tokvecs, bp_tokvecs = tok2vec.begin_update([doc])
-    d_tokvecs = parser.update_beam(([doc], tokvecs), [gold])
-    assert d_tokvecs[0].shape == tokvecs[0].shape
    def optimize(weights, gradient, key=None):
        weights -= 0.001 * gradient
-    bp_tokvecs(d_tokvecs, sgd=optimize)
-    assert d_tokvecs[0].sum() == 0.
+    parser.update_beam([doc], [gold], sgd=optimize)


--- a/spacy/tests/regression/test_issue1305.py
+++ b/spacy/tests/regression/test_issue1305.py
@ -0,0 +1,8 @@
+import pytest
+
+@pytest.mark.models('en')
+def test_issue1305(EN):
+    '''Test lemmatization of English VBZ'''
+    assert EN.vocab.morphology.lemmatizer('works', 'verb') == set(['work'])
+    doc = EN(u'This app works well')
+    assert doc[2].lemma_ == 'work'
--- a/spacy/tests/regression/test_issue1380.py
+++ b/spacy/tests/regression/test_issue1380.py
@ -0,0 +1,14 @@
+from __future__ import unicode_literals
+import pytest
+
+from ...language import Language
+
+def test_issue1380_empty_string():
+    nlp = Language()
+    doc = nlp('')
+    assert len(doc) == 0
+
+@pytest.mark.models('en')
+def test_issue1380_en(EN):
+    doc = EN('')
+    assert len(doc) == 0
--- a/spacy/tests/regression/test_issue429.py
+++ b/spacy/tests/regression/test_issue429.py
@ -9,11 +9,14 @@ import pytest
@pytest.mark.models('en')
 def test_issue429(EN):
    def merge_phrases(matcher, doc, i, matches):
-      if i != len(matches) - 1:
-        return None
-      spans = [(ent_id, ent_id, doc[start:end]) for ent_id, start, end in matches]
-      for ent_id, label, span in spans:
-        span.merge('NNP' if label else span.root.tag_, span.text, EN.vocab.strings[label])
+        if i != len(matches) - 1:
+            return None
+        spans = [(ent_id, ent_id, doc[start:end]) for ent_id, start, end in matches]
+        for ent_id, label, span in spans:
+            span.merge(
+                tag=('NNP' if label else span.root.tag_),
+                lemma=span.text,
+                label='PERSON')

    doc = EN('a')
    matcher = Matcher(EN.vocab)
--- a/spacy/tests/serialize/test_serialize_tagger.py
+++ b/spacy/tests/serialize/test_serialize_tagger.py
@ -11,7 +11,7 @@ import pytest
 def taggers(en_vocab):
    tagger1 = Tagger(en_vocab)
    tagger2 = Tagger(en_vocab)
-    tagger1.model = tagger1.Model(8, 8)
+    tagger1.model = tagger1.Model(8)
    tagger2.model = tagger1.model
    return (tagger1, tagger2)

--- a/spacy/tests/stringstore/test_stringstore.py
+++ b/spacy/tests/stringstore/test_stringstore.py
@ -6,6 +6,16 @@ from ...strings import StringStore
 import pytest


+def test_string_hash(stringstore):
+    '''Test that string hashing is stable across platforms'''
+    ss = stringstore
+    assert ss.add('apple') == 8566208034543834098
+    heart = '\U0001f499'
+    print(heart)
+    h = ss.add(heart)
+    assert h == 11841826740069053588
+ 
+
 def test_stringstore_from_api_docs(stringstore):
    apple_hash = stringstore.add('apple')
    assert apple_hash == 8566208034543834098
--- a/spacy/tests/test_matcher.py
+++ b/spacy/tests/test_matcher.py
@ -34,7 +34,6 @@ def test_matcher_from_api_docs(en_vocab):
    assert len(patterns[0])


-@pytest.mark.xfail
 def test_matcher_from_usage_docs(en_vocab):
    text = "Wow 😀 This is really cool! 😂 😂"
    doc = get_doc(en_vocab, words=text.split(' '))
@ -46,7 +45,8 @@ def test_matcher_from_usage_docs(en_vocab):
        if doc.vocab.strings[match_id] == 'HAPPY':
            doc.sentiment += 0.1
        span = doc[start : end]
-        token = span.merge(norm='happy emoji')
+        token = span.merge()
+        token.vocab[token.text].norm_ = 'happy emoji'

    matcher = Matcher(en_vocab)
    matcher.add('HAPPY', label_sentiment, *pos_patterns)
@ -98,11 +98,11 @@ def test_matcher_match_multi(matcher):
                            (doc.vocab.strings['Java'], 5, 6)]


-@pytest.mark.xfail
 def test_matcher_phrase_matcher(en_vocab):
    words = ["Google", "Now"]
    doc = get_doc(en_vocab, words)
-    matcher = PhraseMatcher(en_vocab, [doc])
+    matcher = PhraseMatcher(en_vocab)
+    matcher.add('COMPANY', None, doc)
    words = ["I", "like", "Google", "Now", "best"]
    doc = get_doc(en_vocab, words)
    assert len(matcher(doc)) == 1
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@ -9,7 +9,8 @@ from .util import get_doc

 from pathlib import Path
 import pytest
-from thinc.neural import Maxout, Softmax
+from thinc.neural._classes.maxout import Maxout
+from thinc.neural._classes.softmax import Softmax
 from thinc.api import chain


--- a/spacy/tests/tokenizer/test_exceptions.py
+++ b/spacy/tests/tokenizer/test_exceptions.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals

+import sys
 import pytest


@ -37,9 +38,10 @@ def test_tokenizer_excludes_false_pos_emoticons(tokenizer, text, length):
    tokens = tokenizer(text)
    assert len(tokens) == length

-
@pytest.mark.parametrize('text,length', [('can you still dunk?🍕🍔😵LOL', 8),
                                         ('i💙you', 3), ('🤘🤘yay!', 4)])
 def test_tokenizer_handles_emoji(tokenizer, text, length):
-    tokens = tokenizer(text)
-    assert len(tokens) == length
+    # These break on narrow unicode builds, e.g. Windows
+    if sys.maxunicode >= 1114111:
+        tokens = tokenizer(text)
+        assert len(tokens) == length
--- a/spacy/tokens/doc.pxd
+++ b/spacy/tokens/doc.pxd
@ -54,7 +54,7 @@ cdef class Doc:

    cdef public object noun_chunks_iterator

-    cdef int push_back(self, LexemeOrToken lex_or_tok, bint trailing_space) except -1
+    cdef int push_back(self, LexemeOrToken lex_or_tok, bint has_space) except -1

    cpdef np.ndarray to_array(self, object features)

--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -660,7 +660,7 @@ cdef class Doc:
        """
        with path.open('rb') as file_:
            bytes_data = file_.read()
-        self.from_bytes(bytes_data, **exclude)
+        return self.from_bytes(bytes_data, **exclude)

    def to_bytes(self, **exclude):
        """Serialize, i.e. export the document contents to a binary string.
--- a/spacy/util.py
+++ b/spacy/util.py
@ -3,7 +3,7 @@ from __future__ import unicode_literals, print_function

 import os
 import ujson
-import pip
+import pkg_resources
 import importlib
 import regex as re
 from pathlib import Path
@ -14,6 +14,7 @@ import numpy
 import io
 import dill
 from collections import OrderedDict
+from thinc.neural._classes.model import Model

 import msgpack
 import msgpack_numpy
@ -180,9 +181,10 @@ def is_package(name):
    name (unicode): Name of package.
    RETURNS (bool): True if installed package, False if not.
    """
-    packages = pip.get_installed_distributions()
+    name = name.lower()  # compare package name against lowercase name
+    packages = pkg_resources.working_set.by_key.keys()
    for package in packages:
-        if package.project_name.replace('-', '_') == name:
+        if package.lower().replace('-', '_') == name:
            return True
    return False

@ -193,6 +195,7 @@ def get_package_path(name):
    name (unicode): Package name.
    RETURNS (Path): Path to installed package.
    """
+    name = name.lower()  # use lowercase version to be safe
    # Here we're importing the module just to find it. This is worryingly
    # indirect, but it's otherwise very difficult to find the package.
    pkg = importlib.import_module(name)
@ -557,3 +560,17 @@ def minify_html(html):
    RETURNS (unicode): "Minified" HTML.
    """
    return html.strip().replace('    ', '').replace('\n', '')
+
+
+def use_gpu(gpu_id):
+    try:
+        import cupy.cuda.device
+    except ImportError:
+        return None
+    from thinc.neural.ops import CupyOps
+    device = cupy.cuda.device.Device(gpu_id)
+    device.use()
+    Model.ops = CupyOps()
+    Model.Ops = CupyOps
+    return device
+
--- a/spacy/vectors.pyx
+++ b/spacy/vectors.pyx
@ -6,6 +6,8 @@ import msgpack
 import msgpack_numpy
 msgpack_numpy.patch()
 cimport numpy as np
+from thinc.neural.util import get_array_module
+from thinc.neural._classes.model import Model

 from .typedefs cimport attr_t
 from .strings cimport StringStore
@ -14,15 +16,29 @@ from .compat import basestring_


 cdef class Vectors:
-    '''Store, save and load word vectors.'''
+    '''Store, save and load word vectors.
+
+    Vectors data is kept in the vectors.data attribute, which should be an
+    instance of numpy.ndarray (for CPU vectors)
+    or cupy.ndarray (for GPU vectors).
+
+    vectors.key2row is a dictionary mapping word hashes to rows
+    in the vectors.data table. The array `vectors.keys` keeps
+    the keys in order, such that keys[vectors.key2row[key]] == key.
+    '''
    cdef public object data
    cdef readonly StringStore strings
    cdef public object key2row
    cdef public object keys
    cdef public int i

-    def __init__(self, strings, data_or_width):
-        self.strings = StringStore()
+    def __init__(self, strings, data_or_width=0):
+        if isinstance(strings, StringStore):
+            self.strings = strings
+        else:
+            self.strings = StringStore()
+            for string in strings:
+                self.strings.add(string)
        if isinstance(data_or_width, int):
            self.data = data = numpy.zeros((len(strings), data_or_width),
                                           dtype='f')
@ -37,6 +53,11 @@ cdef class Vectors:
        return (Vectors, (self.strings, self.data))

    def __getitem__(self, key):
+        '''Get a vector by key. If key is a string, it is hashed
+        to an integer ID using the vectors.strings table.
+
+        If the integer key is not found in the table, a KeyError is raised.
+        '''
        if isinstance(key, basestring):
            key = self.strings[key]
        i = self.key2row[key]
@ -46,23 +67,30 @@ cdef class Vectors:
            return self.data[i]

    def __setitem__(self, key, vector):
+        '''Set a vector for the given key. If key is a string, it is hashed
+        to an integer ID using the vectors.strings table.
+        '''
        if isinstance(key, basestring):
            key = self.strings.add(key)
        i = self.key2row[key]
        self.data[i] = vector

    def __iter__(self):
+        '''Yield vectors from the table.'''
        yield from self.data

    def __len__(self):
+        '''Return the number of vectors that have been assigned.'''
        return self.i

    def __contains__(self, key):
+        '''Check whether a key has a vector entry in the table.'''
        if isinstance(key, basestring_):
            key = self.strings[key]
        return key in self.key2row

    def add(self, key, vector=None):
+        '''Add a key to the table, optionally setting a vector value as well.'''
        if isinstance(key, basestring_):
            key = self.strings.add(key)
        if key not in self.key2row:
@ -80,7 +108,9 @@ cdef class Vectors:
        return i

    def items(self):
-        for i, string in enumerate(self.strings):
+        '''Iterate over (string key, vector) pairs, in order.'''
+        for i, key in enumerate(self.keys):
+            string = self.strings[key]
            yield string, self.data[i]

    @property
@ -118,9 +148,14 @@ cdef class Vectors:
            self.data

    def to_disk(self, path, **exclude):
+        xp = get_array_module(self.data)
+        if xp is numpy:
+            save_array = lambda arr, file_: xp.save(file_, arr, allow_pickle=False)
+        else:
+            save_array = lambda arr, file_: xp.save(file_, arr)
        serializers = OrderedDict((
-            ('vectors', lambda p: numpy.save(p.open('wb'), self.data, allow_pickle=False)),
-            ('keys', lambda p: numpy.save(p.open('wb'), self.keys, allow_pickle=False)),
+            ('vectors', lambda p: save_array(self.data, p.open('wb'))),
+            ('keys', lambda p: xp.save(p.open('wb'), self.keys))
        ))
        return util.to_disk(path, serializers, exclude)

@ -133,8 +168,9 @@ cdef class Vectors:
                    self.key2row[key] = i

        def load_vectors(path):
+            xp = Model.ops.xp
            if path.exists():
-                self.data = numpy.load(path)
+                self.data = xp.load(path)

        serializers = OrderedDict((
            ('keys', load_keys),
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -27,6 +27,7 @@ from .vectors import Vectors
 from . import util
 from . import attrs
 from . import symbols
+from ._ml import link_vectors_to_models


 cdef class Vocab:
@ -65,7 +66,7 @@ cdef class Vocab:
                self.strings.add(name)
        self.lex_attr_getters = lex_attr_getters
        self.morphology = Morphology(self.strings, tag_map, lemmatizer)
-        self.vectors = Vectors(self.strings, 300)
+        self.vectors = Vectors(self.strings)

    property lang:
        def __get__(self):
@ -261,7 +262,7 @@ cdef class Vocab:
        Words can be looked up by string or int ID.

        RETURNS:
-            A word vector. Size and shape determed by the
+            A word vector. Size and shape determined by the
            vocab.vectors instance. Usually, a numpy ndarray
            of shape (300,) and dtype float32.

@ -323,6 +324,7 @@ cdef class Vocab:
            self.lexemes_from_bytes(file_.read())
        if self.vectors is not None:
            self.vectors.from_disk(path, exclude='strings.json')
+        link_vectors_to_models(self)
        return self

    def to_bytes(self, **exclude):
@ -436,6 +438,7 @@ def unpickle_vocab(sstore, morphology, data_dir,
    vocab.lex_attr_getters = lex_attr_getters
    vocab.lexemes_from_bytes(lexemes_data)
    vocab.length = length
+    link_vectors_to_models(vocab)
    return vocab


--- a/travis.sh
+++ b/travis.sh
@ -17,6 +17,7 @@ fi

 if [ "${VIA}" == "compile" ]; then
  pip install -r requirements.txt
+  python setup.py build_ext --inplace
  pip install -e .
 fi

--- a/website/404.jade
+++ b/website/404.jade
@ -8,4 +8,5 @@ include _includes/_mixins
        | does not exist!

    h2.c-landing__title.u-heading-3.u-padding-small
-        a(href="javascript:history.go(-1)") Click here to go back.
+        +button(false, true, "secondary-light")(href="javascript:history.go(-1)")
+            |  Click here to go back
--- a/website/_data.json
+++ b/website/_data.json
@ -3,24 +3,22 @@
        "landing": true,
        "logos": [
            {
-                "quora": [ "https://www.quora.com", 150 ],
-                "chartbeat": [ "https://chartbeat.com", 200 ],
-                "duedil": [ "https://www.duedil.com", 150 ],
-                "stitchfix": [ "https://www.stitchfix.com", 190 ]
+                "airbnb": [ "https://www.airbnb.com", 150, 45],
+                "quora": [ "https://www.quora.com", 120, 34 ],
+                "retriever": [ "https://www.retriever.no", 150, 33 ],
+                "stitchfix": [ "https://www.stitchfix.com", 150, 18 ]
            },
            {
-                "wayblazer": [ "http://wayblazer.com", 200 ],
-                "indico": [ "https://indico.io", 150 ],
-                "chattermill": [ "https://chattermill.io", 175 ],
-                "turi": [ "https://turi.com", 150 ],
-                "kip": [ "http://kipthis.com", 70 ]
-            },
+                "chartbeat": [ "https://chartbeat.com", 180, 25 ],
+                "allenai": [ "https://allenai.org", 220, 37 ]
+            }
+        ],
+        "features": [
            {
-                "socrata": [ "https://www.socrata.com", 150 ],
-                "cytora": [ "http://www.cytora.com", 125 ],
-                "signaln": [ "http://signaln.com", 150 ],
-                "wonderflow": [ "http://www.wonderflow.co", 200 ],
-                "synapsify": [ "http://www.gosynapsify.com", 150 ]
+                "thoughtworks": ["https://www.thoughtworks.com/radar/tools", 150, 28],
+                "wapo": ["https://www.washingtonpost.com/news/wonk/wp/2016/05/18/googles-new-artificial-intelligence-cant-understand-these-sentences-can-you/", 100, 77],
+                "venturebeat": ["https://venturebeat.com/2017/01/27/4-ai-startups-that-analyze-customer-reviews/", 150, 19],
+                "microsoft": ["https://www.microsoft.com/developerblog/2016/09/13/training-a-classifier-for-relation-extraction-from-medical-literature/", 130, 28]
            }
        ]
    },
@ -34,7 +32,24 @@
        "landing": true
    },

-    "announcement" : {
-        "title": "Important Announcement"
+    "styleguide": {
+        "title": "Styleguide",
+        "sidebar": {
+            "Styleguide": { "": "styleguide" },
+            "Resources": {
+                "Website Source": "https://github.com/explosion/spacy/tree/master/website",
+                "Contributing Guide": "https://github.com/explosion/spaCy/blob/master/CONTRIBUTING.md"
+            }
+        },
+        "menu": {
+            "Introduction": "intro",
+            "Logo": "logo",
+            "Colors": "colors",
+            "Typography": "typography",
+            "Elements": "elements",
+            "Components": "components",
+            "Embeds": "embeds",
+            "Markup Reference": "markup"
+        }
    }
 }
--- a/website/_harp.json
+++ b/website/_harp.json
@ -11,12 +11,9 @@
        "COMPANY": "Explosion AI",
        "COMPANY_URL": "https://explosion.ai",
        "DEMOS_URL": "https://demos.explosion.ai",
+        "MODELS_REPO": "explosion/spacy-models",

-        "SPACY_VERSION": "1.8",
-        "LATEST_NEWS": {
-            "url": "https://github.com/explosion/spaCy/releases/tag/v2.0.0-alpha",
-            "title": "Test spaCy v2.0.0 alpha!"
-        },
+        "SPACY_VERSION": "2.0",

        "SOCIAL": {
            "twitter": "spacy_io",
@ -27,25 +24,23 @@
        },

        "NAVIGATION": {
-            "Home": "/",
-            "Usage": "/docs/usage",
-            "Reference": "/docs/api",
-            "Demos": "/docs/usage/showcase",
-            "Blog": "https://explosion.ai/blog"
+            "Usage": "/usage",
+            "Models": "/models",
+            "API": "/api"
        },

        "FOOTER": {
            "spaCy": {
-                "Usage": "/docs/usage",
-                "API Reference": "/docs/api",
-                "Tutorials": "/docs/usage/tutorials",
-                "Showcase": "/docs/usage/showcase"
+                "Usage": "/usage",
+                "Models": "/models",
+                "API Reference": "/api",
+                "Resources": "/usage/resources"
            },
            "Support": {
                "Issue Tracker": "https://github.com/explosion/spaCy/issues",
                "StackOverflow": "http://stackoverflow.com/questions/tagged/spacy",
-                "Reddit usergroup": "https://www.reddit.com/r/spacynlp/",
-                "Gitter chat": "https://gitter.im/explosion/spaCy"
+                "Reddit Usergroup": "https://www.reddit.com/r/spacynlp/",
+                "Gitter Chat": "https://gitter.im/explosion/spaCy"
            },
            "Connect": {
                "Twitter": "https://twitter.com/spacy_io",
@ -74,21 +69,11 @@
                {"id": "venv", "title": "virtualenv", "help": "Use a virtual environment and install spaCy into a user directory" },
                {"id": "gpu", "title": "GPU", "help": "Run spaCy on GPU to make it faster. Requires an NVDIA graphics card with CUDA 2+. See section below for more info."}]
            },
-            { "id": "model", "title": "Models", "multiple": true, "options": [
-                { "id": "en", "title": "English", "meta": "50MB" },
-                { "id": "de", "title": "German", "meta": "645MB" },
-                { "id": "fr", "title": "French", "meta": "1.33GB" },
-                { "id": "es", "title": "Spanish", "meta": "377MB"}]
-            }
+            { "id": "model", "title": "Models", "multiple": true }
        ],

        "QUICKSTART_MODELS": [
-            { "id": "lang", "title": "Language", "options": [
-                { "id": "en", "title": "English", "checked": true },
-                { "id": "de", "title": "German" },
-                { "id": "fr", "title": "French" },
-                { "id": "es", "title": "Spanish" }]
-            },
+            { "id": "lang", "title": "Language"},
            { "id": "load", "title": "Loading style", "options": [
                { "id": "spacy", "title": "Use spacy.load()", "checked": true, "help": "Use spaCy's built-in loader to load the model by name." },
                {  "id": "module", "title": "Import as module", "help": "Import the model explicitly as a Python module." }]
@ -98,50 +83,15 @@
            }
        ],

-        "MODELS": {
-            "en": [
-                { "id": "en_core_web_sm", "lang": "English", "feats": [1, 1, 1, 1], "size": "50 MB", "license": "CC BY-SA", "def": true },
-                { "id": "en_core_web_md", "lang": "English", "feats": [1, 1, 1, 1], "size": "1 GB", "license": "CC BY-SA" },
-                { "id": "en_depent_web_md", "lang": "English", "feats": [1, 1, 1, 0], "size": "328 MB", "license": "CC BY-SA" },
-                { "id": "en_vectors_glove_md", "lang": "English", "feats": [1, 0, 0, 1], "size": "727 MB", "license": "CC BY-SA" }
-            ],
-            "de": [
-                { "id": "de_core_news_md", "lang": "German", "feats": [1, 1, 1, 1], "size": "645 MB", "license": "CC BY-SA" }
-            ],
-            "fr": [
-                { "id": "fr_depvec_web_lg", "lang": "French", "feats": [1, 1, 0, 1], "size": "1.33 GB", "license": "CC BY-NC" }
-            ],
-            "es": [
-                { "id": "es_core_web_md", "lang": "Spanish", "feats": [1, 1, 1, 1], "size": "377 MB", "license": "CC BY-SA"}
-            ]
-        },
-
-        "EXAMPLE_SENTENCES": {
-            "en": "This is a sentence.",
-            "de": "Dies ist ein Satz.",
-            "fr": "C'est une phrase.",
-            "es": "Esto es una frase."
-        },
-
        "ALPHA": true,
-        "V_CSS": "1.6",
-        "V_JS": "1.2",
+        "V_CSS": "2.0",
+        "V_JS": "2.0",
        "DEFAULT_SYNTAX": "python",
        "ANALYTICS": "UA-58931649-1",
        "MAILCHIMP": {
            "user": "spacy.us12",
            "id": "83b0498b1e7fa3c91ce68c3f1",
            "list": "89ad33e698"
-        },
-        "BADGES": {
-            "pipy": {
-                "badge": "https://img.shields.io/pypi/v/spacy.svg?style=flat-square",
-                "link": "https://pypi.python.org/pypi/spacy"
-            },
-            "conda": {
-                "badge": "https://anaconda.org/conda-forge/spacy/badges/version.svg",
-                "link": "https://anaconda.org/conda-forge/spacy"
-            }
        }
    }
 }
--- a/website/_includes/_footer.jade
+++ b/website/_includes/_footer.jade
@ -1,8 +1,6 @@
 //- 💫 INCLUDES > FOOTER

-include _mixins
-
-footer.o-footer.u-text.u-border-dotted
+footer.o-footer.u-text
    +grid.o-content
        each group, label in FOOTER
            +grid-col("quarter")
@ -13,18 +11,18 @@ footer.o-footer.u-text.u-border-dotted
                        li
                            +a(url)=item

-        if SECTION != "docs"
+        if SECTION == "index"
            +grid-col("quarter")
                include _newsletter

-    if SECTION == "docs"
+    if SECTION != "index"
        .o-content.o-block.u-border-dotted
            include _newsletter

    .o-inline-list.u-text-center.u-text-tiny.u-color-subtle
        span &copy; 2016-#{new Date().getFullYear()} #[+a(COMPANY_URL, true)=COMPANY]

-        +a(COMPANY_URL, true)
-            +svg("graphics", "explosion", 45).o-icon.u-color-theme.u-grayscale
+        +a(COMPANY_URL, true)(aria-label="Explosion AI")
+            +icon("explosion", 45).o-icon.u-color-theme.u-grayscale

        +a(COMPANY_URL + "/legal", true) Legal / Imprint
--- a/website/_includes/_functions.jade
+++ b/website/_includes/_functions.jade
@ -1,35 +1,71 @@
 //- 💫 INCLUDES > FUNCTIONS

-//- More descriptive variables for current.path and current.source
+//- Descriptive variables, available in the global scope

 - CURRENT = current.source
 - SECTION = current.path[0]
- SUBSECTION = current.path[1]
+- LANGUAGES = public.models._data.LANGUAGES
+- MODELS = public.models._data.MODELS
+- CURRENT_MODELS = MODELS[current.source] || []
+
+- MODEL_COUNT = Object.keys(MODELS).map(m => Object.keys(MODELS[m]).length).reduce((a, b) => a + b)
+- MODEL_LANG_COUNT = Object.keys(MODELS).length
+- LANG_COUNT = Object.keys(LANGUAGES).length
+
+- MODEL_META = public.models._data.MODEL_META
+- MODEL_LICENSES = public.models._data.MODEL_LICENSES
+- MODEL_ACCURACY = public.models._data.MODEL_ACCURACY
+- EXAMPLE_SENTENCES = public.models._data.EXAMPLE_SENTENCES
+
+- IS_PAGE = (SECTION != "index") && !landing
+- IS_MODELS = (SECTION == "models" && LANGUAGES[current.source])
+- HAS_MODELS = IS_MODELS && CURRENT_MODELS.length


 //- Add prefixes to items of an array (for modifier CSS classes)
+    array   - [array] list of class names or options, e.g. ["foot"]
+    prefix  - [string] prefix to add to each class, e.g. "c-table__row"
+    RETURNS - [array] list of modified class names

 -   function prefixArgs(array, prefix) {
-       return array.map(function(arg) {
-           return prefix + '--' + arg;
-       }).join(' ');
+-       return array.map(arg => prefix + '--' + arg).join(' ');
+-   }
+
+
+//- Convert API paths (semi-temporary fix for renamed sections)
+    path    - [string] link path supplied to +api mixin
+    RETURNS - [string] new link path to correct location
+
+-   function convertAPIPath(path) {
+-       if (path.startsWith('spacy#') || path.startsWith('displacy#') || path.startsWith('util#')) {
+-           var comps = path.split('#');
+-           return "top-level#" + comps[0] + '.' + comps[1];
+-       }
+-       else if (path.startsWith('cli#')) {
+-           return "top-level#" + path.split('#')[1];
+-       }
+-       return path;
+-   }
+
+
+//- Get model components from ID. Components can then be looked up in LANGUAGES
+    and MODEL_META respectively, to get their human-readable form.
+    id      - [string] model ID, e.g. "en_core_web_sm"
+    RETURNS - [object] object keyed by components lang, type, genre and size
+
+-   function getModelComponents(id) {
+-       var comps = id.split('_');
+-       return {'lang': comps[0], 'type': comps[1], 'genre': comps[2], 'size': comps[3]}
 -   }


 //- Generate GitHub links
+    repo     - [string] name of repo owned by explosion
+    filepath - [string] logical path to file relative to repository root
+    branch   - [string] optional branch, defaults to "master"
+    RETURNS  - [string] the correct link to the file on GitHub

 -   function gh(repo, filepath, branch) {
 -       var branch = ALPHA ? 'develop' : branch
-       return 'https://github.com/' + SOCIAL.github + '/' + repo + (filepath ? '/blob/' + (branch || 'master') + '/' + filepath : '' );
-   }
-
-
-//- Get social images
-
-   function getSocialImg() {
-       var base = SITE_URL + '/assets/img/social/preview_'
-       var image = ALPHA ? 'alpha' : 'default'
-       if (preview) image = preview
-       else if (SECTION == 'docs' && !ALPHA) image = 'docs'
-       return base + image + '.jpg'
+-       return 'https://github.com/' + SOCIAL.github + '/' + (repo || '') + (filepath ? '/blob/' + (branch || 'master') + '/' + filepath : '' );
 -   }
--- a/website/_includes/_mixins-base.jade
+++ b/website/_includes/_mixins-base.jade
@ -1,5 +1,13 @@
 //- 💫 MIXINS > BASE

+//- Section
+    id - [string] anchor assigned to section (used for breadcrumb navigation)
+
+mixin section(id)
+    section.o-section(id="section-" + id data-section=id)
+        block
+
+
 //- Aside wrapper
    label - [string] aside label

@ -11,34 +19,26 @@ mixin aside-wrapper(label)

            block

-//- Date
-    input - [string] date in the format YYYY-MM-DD

-mixin date(input)
-    - var date = new Date(input)
-    - var months = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December' ]
-
-    time(datetime=JSON.parse(JSON.stringify(date)))&attributes(attributes)=months[date.getMonth()] + ' ' + date.getDate() + ', ' + date.getFullYear()
-
-
-//- SVG from map
-    file   - [string] SVG file name in /assets/img/
+//- SVG from map (uses embedded SVG sprite)
    name   - [string] SVG symbol id
    width  - [integer] width in px
    height - [integer] height in px (default: same as width)

-mixin svg(file, name, width, height)
+mixin svg(name, width, height)
    svg(aria-hidden="true" viewBox="0 0 #{width} #{height || width}" width=width height=(height || width))&attributes(attributes)
-        use(xlink:href="/assets/img/#{file}.svg##{name}")
+        use(xlink:href="#svg_#{name}")


 //- Icon
-    name - [string] icon name, should be SVG symbol ID
-    size - [integer] icon width and height (default: 20)
+    name   - [string] icon name (will be used as symbol id: #svg_{name})
+    width  - [integer] icon width (default: 20)
+    height - [integer] icon height (defaults to width)

-mixin icon(name, size)
-    - var size = size || 20
-    +svg("icons", name, size).o-icon(style="min-width: #{size}px")&attributes(attributes)
+mixin icon(name, width, height)
+    - var width = width || 20
+    - var height = height || width
+    +svg(name, width, height).o-icon(style="min-width: #{width}px")&attributes(attributes)


 //- Pro/Con/Neutral icon
@ -46,8 +46,8 @@ mixin icon(name, size)
    size - [integer] icon size (optional)

 mixin procon(icon, size)
-    - colors = { pro: "green", con: "red", neutral: "yellow" }
-    +icon(icon, size)(class="u-color-#{colors[icon] || 'subtle'}" aria-label=icon)&attributes(attributes)
+    - colors = { pro: "green", con: "red", neutral: "subtle" }
+    +icon("circle", size || 16)(class="u-color-#{colors[icon] || 'subtle'}" aria-label=icon)&attributes(attributes)


 //- Headlines Helper Mixin
@ -80,8 +80,7 @@ mixin headline(level)

 mixin permalink(id)
    if id
-        a.u-permalink(id=id href="##{id}")
-            +icon("anchor").u-permalink__icon
+        a.u-permalink(href="##{id}")
            block

    else
@ -109,7 +108,7 @@ mixin quickstart(groups, headline, description, hide_results)
                    .c-quickstart__fields
                        for option in group.options
                            input.c-quickstart__input(class="c-quickstart__input--" + (group.input_style ? group.input_style : group.multiple ? "check" : "radio") type=group.multiple ? "checkbox" : "radio" name=group.id id="qs-#{option.id}" value=option.id checked=option.checked)
-                            label.c-quickstart__label(for="qs-#{option.id}")!=option.title
+                            label.c-quickstart__label.u-text-tiny(for="qs-#{option.id}")!=option.title
                                if option.meta
                                    |  #[span.c-quickstart__label__meta (#{option.meta})]
                                if option.help
@ -122,12 +121,10 @@ mixin quickstart(groups, headline, description, hide_results)
                code.c-code-block__content.c-quickstart__code(data-qs-results="")
                    block

-    .c-quickstart__info.u-text-tiny.o-block.u-text-right
-        |  Like this widget? Check out #[+a("https://github.com/ines/quickstart").u-link quickstart.js]!
-

 //- Quickstart code item
-    data [object] - Rendering conditions (keyed by option group ID, value: option)
+    data  - [object] Rendering conditions (keyed by option group ID, value: option)
+    style - [string] modifier ID for line style

 mixin qs(data, style)
    - args = {}
@ -148,6 +145,13 @@ mixin terminal(label)
        +code.x-terminal__code
            block

+//- Chart.js
+    id - [string] chart ID, will be assigned as #chart_{id}
+
+mixin chart(id)
+    figure.o-block&attributes(attributes)
+        canvas(id="chart_#{id}" width="800" height="400" style="max-width: 100%")
+

 //- Gitter chat button and widget
    button - [string] text shown on button
@ -156,26 +160,24 @@ mixin terminal(label)
 mixin gitter(button, label)
    aside.js-gitter.c-chat.is-collapsed(data-title=(label || button))

-    button.js-gitter-button.c-chat__button.u-text-small
-        +icon("chat").o-icon--inline
+    button.js-gitter-button.c-chat__button.u-text-tag
+        +icon("chat", 16).o-icon--inline
        !=button


 //- Badge
-    name - [string] "pipy" or "conda"
+    image - [string] path to badge image
+    url   - [string] badge link

-mixin badge(name)
-    - site = BADGES[name]
-
-    if site
-        +a(site.link).u-padding-small
-            img(src=site.badge alt="{name} version" height="20")
+mixin badge(image, url)
+    +a(url).u-padding-small.u-hide-link&attributes(attributes)
+        img.o-badge(src=image alt=url height="20")


-//- Logo
+//- spaCy logo

 mixin logo()
-    +svg("graphics", "spacy", 675, 215).o-logo&attributes(attributes)
+    +svg("spacy", 675, 215).o-logo&attributes(attributes)


 //- Landing
@ -186,18 +188,56 @@ mixin landing-header()
            .c-landing__content
                block

+mixin landing-banner(headline, label)
+    .c-landing__banner.u-padding.o-block.u-color-light
+        +grid.c-landing__banner__content.o-no-block
+            +grid-col("third")
+                h3.u-heading.u-heading-1
+                    if label
+                        div
+                            span.u-text-label.u-text-label--light=label
+                    !=headline

-mixin landing-badge(url, graphic, alt, size)
-    +a(url)(aria-label=alt title=alt).c-landing__badge
-        +svg("graphics", graphic, size || 225)
+            +grid-col("two-thirds").c-landing__banner__text
+                block
+
+
+mixin landing-logos(title, logos)
+    .o-content.u-text-center&attributes(attributes)
+        h3.u-heading.u-text-label.u-color-dark=title
+
+        each row, i in logos
+            - var is_last = i == logos.length - 1
+            +grid("center").o-inline-list.o-no-block(class=is_last ? "o-no-block" : null)
+                each details, name in row
+                    +a(details[0]).u-padding-medium
+                        +icon(name, details[1], details[2])
+
+                if is_last
+                    block


 //- Under construction (temporary)
    Marks sections that still need to be completed for the v2.0 release.

 mixin under-construction()
-    +infobox("🚧 Under construction")
+    +infobox("Under construction", "🚧")
        |  This section is still being written and will be updated for the v2.0
        |  release. Is there anything that you think should definitely mentioned or
        |  explained here? Any examples you'd like to see? #[strong Let us know]
        |  on the #[+a(gh("spacy") + "/issues/1105") v2.0 alpha thread] on GitHub!
+
+
+//- Alpha infobox (temporary)
+    Added in the templates to notify user that they're visiting the alpha site.
+
+mixin alpha-info()
+    +infobox("You are viewing the spaCy v2.0.0 alpha docs", "⚠️")
+        strong This page is part of the alpha documentation for spaCy v2.0.
+        |  It does not reflect the state of the latest stable release.
+        |  Because v2.0 is still under development, the implementation
+        |  may differ from the intended state described here. See the
+        |  #[+a(gh("spaCy") + "/releases/tag/v2.0.0-alpha") release notes]
+        |  for details on how to install and test the new version. To
+        |  read the official docs for spaCy v1.x,
+        |  #[+a("https://spacy.io/docs") go here].
--- a/website/_includes/_mixins.jade
+++ b/website/_includes/_mixins.jade
@ -8,11 +8,15 @@ include _mixins-base
    level - [integer] headline level, corresponds to h1, h2, h3 etc.
    id    - [string] unique identifier, creates permalink (optional)

-mixin h(level, id)
-    +headline(level).u-heading&attributes(attributes)
+mixin h(level, id, source)
+    +headline(level).u-heading(id=id)&attributes(attributes)
        +permalink(id)
            block

+        if source
+            +button(gh("spacy", source), false, "secondary", "small").u-nowrap.u-float-right
+                span Source #[+icon("code", 14).o-icon--inline]
+

 //- External links
    url     - [string] link href
@ -38,21 +42,23 @@ mixin src(url)


 //- API link (with added tag and automatically generated path)
-    path - [string] path to API docs page relative to /docs/api/
+    path - [string] path to API docs page relative to /api/

 mixin api(path)
-    +a("/docs/api/" + path, true)(target="_self").u-no-border.u-inline-block.u-nowrap
+    - path = convertAPIPath(path)
+    +a("/api/" + path, true)(target="_self").u-no-border.u-inline-block.u-nowrap
        block

-        |  #[+icon("book", 18).o-icon--inline.u-color-theme]
+        |  #[+icon("book", 16).o-icon--inline.u-color-theme]


 //- Help icon with tooltip
-    tooltip - [string] Tooltip text
+    tooltip   - [string] Tooltip text
+    icon_size - [integer] Optional size of help icon in px.

-mixin help(tooltip)
+mixin help(tooltip, icon_size)
    span(data-tooltip=tooltip)&attributes(attributes)
-        +icon("help", 16).i-icon--inline
+        +icon("help", icon_size || 16).o-icon--inline


 //- Aside for text
@ -68,24 +74,43 @@ mixin aside(label)
    label    - [string] aside title (optional or false for no label)
    language - [string] language for syntax highlighting (default: "python")
               supports basic relevant languages available for PrismJS
+    prompt   - [string] prompt displayed before first line, e.g. "$"

-mixin aside-code(label, language)
+mixin aside-code(label, language, prompt)
    +aside-wrapper(label)
-        +code(false, language).o-no-block
+        +code(false, language, prompt).o-no-block
            block


 //- Infobox
    label - [string] infobox title (optional or false for no title)
+    emoji - [string] optional emoji displayed before the title, necessary as
+            argument to be able to wrap it for spacing

-mixin infobox(label)
+mixin infobox(label, emoji)
    aside.o-box.o-block.u-text-small
        if label
-            h3.u-text-label.u-color-theme=label
+            h3.u-heading.u-text-label.u-color-theme
+                if emoji
+                    span.o-emoji=emoji
+                |  #{label}

        block


+//- Logos displayed in the top corner of some infoboxes
+    logos - [array] List of icon ID, width, height and link.
+
+mixin infobox-logos(...logos)
+    .o-box__logos.u-text-right.u-float-right
+        for logo in logos
+            if logo[3]
+                |  #[+a(logo[3]).u-inline-block.u-hide-link.u-padding-small #[+icon(logo[0], logo[1], logo[2]).u-color-dark]]
+            else
+                |  #[+icon(logo[0], logo[1], logo[2]).u-color-dark]
+
+
+
 //- Link button
    url      - [string] link href
    trusted  - [boolean] if not set / false, rel="noopener nofollow" is added
@ -94,7 +119,7 @@ mixin infobox(label)
               see assets/css/_components/_buttons.sass

 mixin button(url, trusted, ...style)
-    - external = url.includes("http")
+    - external = url && url.includes("http")
    a.c-button.u-text-label(href=url class=prefixArgs(style, "c-button") role="button" target=external ? "_blank" : null rel=external && !trusted ? "noopener nofollow" : null)&attributes(attributes)
        block

@ -103,31 +128,33 @@ mixin button(url, trusted, ...style)
    label    - [string] aside title (optional or false for no label)
    language - [string] language for syntax highlighting (default: "python")
               supports basic relevant languages available for PrismJS
-    prompt    - [string] prompt or icon to display next to code block, (mostly used for old/new)
+    prompt   - [string] prompt displayed before first line, e.g. "$"
    height   - [integer] optional height to clip code block to
+    icon     - [string] icon displayed next to code block (e.g. "accept" for new code)
+    wrap     - [boolean] wrap text and disable horizontal scrolling

-mixin code(label, language, prompt, height)
+mixin code(label, language, prompt, height, icon, wrap)
    pre.c-code-block.o-block(class="lang-#{(language || DEFAULT_SYNTAX)}" class=icon ? "c-code-block--has-icon" : null style=height ? "height: #{height}px" : null)&attributes(attributes)
        if label
            h4.u-text-label.u-text-label--dark=label
-        - var icon = (prompt == 'accept' || prompt == 'reject')
+        - var icon = icon || (prompt == 'accept' || prompt == 'reject')
        if icon
            - var classes = {'accept': 'u-color-green', 'reject': 'u-color-red'}
            .c-code-block__icon(class=classes[icon] || null class=classes[icon] ? "c-code-block__icon--border" : null)
                +icon(icon, 18)

-        code.c-code-block__content(data-prompt=icon ? null : prompt)
+        code.c-code-block__content(class=wrap ? "u-wrap" : null data-prompt=icon ? null : prompt)
            block


 //- Code blocks to display old/new versions

 mixin code-old()
-    +code(false, false, "reject").o-block-small
+    +code(false, false, false, false, "reject").o-block-small
        block

 mixin code-new()
-    +code(false, false, "accept").o-block-small
+    +code(false, false, false, false, "accept").o-block-small
        block


@ -138,12 +165,33 @@ mixin code-new()

 mixin codepen(slug, height, default_tab)
    figure.o-block(style="min-height: #{height}px")&attributes(attributes)
-        .codepen(data-height=height data-theme-id="26467" data-slug-hash=slug data-default-tab=(default_tab || "result") data-embed-version="2" data-user=SOCIAL.codepen)
+        .codepen(data-height=height data-theme-id="31335" data-slug-hash=slug data-default-tab=(default_tab || "result") data-embed-version="2" data-user=SOCIAL.codepen)
            +a("https://codepen.io/" + SOCIAL.codepen + "/" + slug) View on CodePen

        script(async src="https://assets.codepen.io/assets/embed/ei.js")


+//- GitHub embed
+    repo     - [string] repository owned by explosion organization
+    file     - [string] logical path to file, relative to repository root
+    alt_file - [string] alternative file path used in footer and link button
+    height   - [integer] height of code preview in px
+
+mixin github(repo, file, alt_file, height)
+    - var branch = ALPHA ? "develop" : "master"
+    - var height = height || 250
+
+    figure.o-block
+        pre.c-code-block.o-block-small(class="lang-#{(language || DEFAULT_SYNTAX)}" style="height: #{height}px; min-height: #{height}px")
+            code.c-code-block__content(data-gh-embed="#{repo}/#{branch}/#{file}")
+
+        footer.o-grid.u-text
+            .o-block-small.u-flex-full #[+icon("github")] #[code=repo + '/' + (alt_file || file)]
+            div
+                +button(gh(repo, alt_file || file), false, "primary", "small") View on GitHub
+
+
+
 //- Images / figures
    url     - [string] url or path to image
    width   - [integer] image width in px, for better rendering (default: 500)
@ -168,10 +216,26 @@ mixin image-caption()
        block


-//- Label
+//- Graphic or illustration with button
+    original - [string] Path to original image
+
+mixin graphic(original)
+    +image
+        block
+        if original
+            .u-text-right
+                +button(original, false, "secondary", "small") View large graphic
+
+
+//- Labels

 mixin label()
-    .u-text-label.u-color-subtle&attributes(attributes)
+    .u-text-label.u-color-dark&attributes(attributes)
+        block
+
+
+mixin label-inline()
+    strong.u-text-label.u-color-dark&attributes(attributes)
        block


@ -188,8 +252,10 @@ mixin tag()
 mixin tag-model(...capabs)
    - var intro = "To use this functionality, spaCy needs a model to be installed"
    - var ext = capabs.length ? " that supports the following capabilities: " + capabs.join(', ') : ""
-    +tag Requires model
-    +help(intro + ext + ".").u-color-theme
+
+    span.u-nowrap
+        +tag Needs model
+        +help(intro + ext + ".").u-color-theme


 //- "New" tag to label features new in a specific version
@ -219,15 +285,9 @@ mixin list(type, start)

 //- List item (only used within +list)

-mixin item(procon)
-    if procon
-        li&attributes(attributes)
-            +procon(procon).c-list__icon
-            block
-
-    else
-        li.c-list__item&attributes(attributes)
-            block
+mixin item()
+    li.c-list__item&attributes(attributes)
+        block


 //- Table
@ -237,9 +297,9 @@ mixin table(head)
    table.c-table.o-block&attributes(attributes)

        if head
-            +row
+            +row("head")
                each column in head
-                    th.c-table__head-cell.u-text-label=column
+                    +head-cell=column

        block

@ -251,10 +311,11 @@ mixin row(...style)
        block


-//- Footer table row (only ued within +table)

-mixin footrow()
-    tr.c-table__row.c-table__row--foot&attributes(attributes)
+//- Header table cell (only used within +row)
+
+mixin head-cell()
+    th.c-table__head-cell.u-text-label&attributes(attributes)
        block


@ -284,71 +345,58 @@ mixin grid-col(width)


 //- Card (only used within +grid)
-    title     - [string] card title
-    details   - [object] url, image, author, description, tags etc.
-                (see /docs/usage/_data.json)
+    title  - [string] card title
+    url    - [string] link for card
+    author - [string] optional author, displayed as byline at the bottom
+    icon   - [string] optional ID of icon displayed with card
+    width  - [string] optional width of grid column, defaults to "half"

-mixin card(title, details)
-    +grid-col("half").o-card.u-text&attributes(attributes)
-        if details.image
-            +a(details.url).o-block-small
-                img(src=details.image alt=title width="300" role="presentation")
-
-        if title
-            +a(details.url)
-                +h(3)=title
-
-                    if details.author
-                        .u-text-small.u-color-subtle by #{details.author}
-
-        if details.description || details.tags
-            ul
-                if details.description
-                    li=details.description
-
-                if details.tags
-                    li
-                        each tag in details.tags
-                            span.u-text-tag #{tag}
-                            | &nbsp;
-
-        block
+mixin card(title, url, author, icon, width)
+    +grid-col(width || "half").o-box.o-grid.o-grid--space.u-text&attributes(attributes)
+        +a(url)
+            h4.u-heading.u-text-label
+                if icon
+                    +icon(icon, 25).u-float-right
+                if title
+                    span.u-color-dark=title
+            .o-block-small.u-text-small
+                block
+        if author
+            .u-color-subtle.u-text-tiny by #{author}


-//- Simpler card list item (only used within +list)
-    title     - [string] card title
-    details   - [object] url, image, author, description, tags etc.
-                (see /docs/usage/_data.json)
+//- Table of contents, to be used with +item mixins for links
+    col - [string] width of column (see +grid-col)

-mixin card-item(title, details)
-    +item&attributes(attributes)
-        +a(details.url)=title
-
-        if details.description
-            br
-            span=details.description
-
-        if details.author
-            br
-            span.u-text-small.u-color-subtle by #{details.author}
+mixin table-of-contents(col)
+    +grid-col(col || "half")
+        +infobox
+            +label.o-block-small Table of contents
+            +list("numbers").u-text-small.o-no-block
+                block


-//- Table row for models table
+//- Bibliography
+    id - [string] ID of bibliography component, for anchor links. Can be used if
+         there's more than one bibliography on one page.

-mixin model-row(name, lang, procon, size, license, default_model, divider)
-    - var licenses = { "CC BY-SA": "https://creativecommons.org/licenses/by-sa/3.0/", "CC BY-NC": "https://creativecommons.org/licenses/by-nc/3.0/" }
+mixin bibliography(id)
+    section(id=id || "bibliography")
+        +infobox
+            +label.o-block-small Bibliography
+            +list("numbers").u-text-small.o-no-block
+                block

-    +row(divider ? "divider": null)
-        +cell #[code=name]
-            if default_model
-                |  #[span.u-color-theme(title="default model") #[+icon("star", 16)]]
-        +cell=lang
-        each icon in procon
-            +cell.u-text-center #[+procon(icon ? "pro" : "con")]
-        +cell.u-text-right=size
-        +cell
-            if license in licenses
-                +a(licenses[license])=license
+
+//- Footnote
+    id      - [string / integer] ID of footnote.
+    bib_id  - [string] ID of bibliography component, defaults to "bibliography".
+    tooltip - [string] optional text displayed as tooltip
+
+mixin fn(id, bib_id, tooltip)
+    sup.u-padding-small(id="bib" + id data-tooltip=tooltip)
+        span.u-text-tag
+            +a("#" + (bib_id || "bibliography")).u-hide-link #{id}


 //- Table rows for annotation specs
@ -383,14 +431,3 @@ mixin annotation-row(annots, style)
            else
                +cell=cell
        block
-
-
-//- Table of contents, to be used with +item mixins for links
-    col - [string] width of column (see +grid-col)
-
-mixin table-of-contents(col)
-    +grid-col(col || "half")
-        +infobox
-            +label.o-block-small Table of contents
-            +list("numbers").u-text-small.o-no-block
-                block
--- a/website/_includes/_navigation.jade
+++ b/website/_includes/_navigation.jade
@ -1,19 +1,15 @@
 //- 💫 INCLUDES > TOP NAVIGATION

-include _mixins
-
 nav.c-nav.u-text.js-nav(class=landing ? "c-nav--theme" : null)
-    a(href='/') #[+logo]
-
-    if SUBSECTION != "index"
-        .u-text-label.u-padding-small.u-hidden-xs=SUBSECTION
+    a(href="/" aria-label=SITENAME) #[+logo]

    ul.c-nav__menu
-        - var NAV = ALPHA ? { "Usage": "/docs/usage", "Reference": "/docs/api" } : NAVIGATION
-
-        each url, item in NAV
-            li.c-nav__menu__item(class=(url == "/") ? "u-hidden-xs" : null)
+        - var current_url = '/' + current.path[0]
+        each url, item in NAVIGATION
+            li.c-nav__menu__item(class=(current_url == url) ? "is-active" : null)
                +a(url)=item

-        li.c-nav__menu__item
-            +a(gh("spaCy"))(aria-label="GitHub").u-hidden-xs #[+icon("github", 20)]
+        li.c-nav__menu__item.u-hidden-xs
+            +a(gh("spaCy"))(aria-label="GitHub") #[+icon("github", 20)]
+
+    progress.c-progress.js-progress(value="0" max="1")
--- a/website/_includes/_newsletter.jade
+++ b/website/_includes/_newsletter.jade
@ -1,6 +1,6 @@
 //- 💫 INCLUDES > NEWSLETTER

-ul.o-block
+ul.o-block-small
    li.u-text-label.u-color-subtle Stay in the loop!
    li Receive updates about new releases, tutorials and more.

@ -10,7 +10,6 @@ form.o-grid#mc-embedded-subscribe-form(action="//#{MAILCHIMP.user}.list-manage.c
    div(style="position: absolute; left: -5000px;" aria-hidden="true")
        input(type="text" name="b_#{MAILCHIMP.id}_#{MAILCHIMP.list}" tabindex="-1" value="")

-    .o-grid-col.u-border.u-padding-small
-        input#mce-EMAIL.u-text(type="email" name="EMAIL" placeholder="Your email")
-
-        button#mc-embedded-subscribe.u-text-label.u-color-theme(type="submit" name="subscribe") Sign up
+    .o-grid-col.o-grid.o-grid--nowrap.o-field.u-padding-small
+        input#mce-EMAIL.o-field__input.u-text(type="email" name="EMAIL" placeholder="Your email" aria-label="Your email")
+        button#mc-embedded-subscribe.o-field__button.u-text-label.u-color-theme.u-nowrap(type="submit" name="subscribe") Sign up
--- a/website/_includes/_page-docs.jade
+++ b/website/_includes/_page-docs.jade
@ -1,47 +1,56 @@
 //- 💫 INCLUDES > DOCS PAGE TEMPLATE

- sidebar_content = (SUBSECTION != "index") ? public.docs[SUBSECTION]._data.sidebar : public.docs._data.sidebar || FOOTER
+- sidebar_content = (public[SECTION] ? public[SECTION]._data.sidebar : public._data[SECTION] ? public._data[SECTION].sidebar : false) || FOOTER

 include _sidebar

 main.o-main.o-main--sidebar.o-main--aside
    article.o-content
        +grid.o-no-block
-            +grid-col(source ? "two-thirds" : "full")
-                +h(1)=title
-                    if tag
-                        +tag=tag
+            +h(1).u-heading--title=title.replace("'", "’")
+                if tag
+                    +tag=tag
+                if tag_new
+                    +tag-new(tag_new)
+
+                if teaser
+                    .u-heading__teaser.u-text-small.u-color-dark=teaser
+                else if IS_MODELS
+                    .u-heading__teaser.u-text-small.u-color-dark
+                        |  Available statistical models for
+                        |  #[code=current.source] (#{LANGUAGES[current.source]}).

            if source
-                +grid-col("third").u-text-right
-                    .o-inline-list
-                        +button(gh("spacy", source), false, "secondary").u-text-tag Source #[+icon("code", 14)]
+                .o-block.u-text-right
+                    +button(gh("spacy", source), false, "secondary", "small").u-nowrap
+                        |  Source #[+icon("code", 14)]

+        //-if ALPHA
+        //-    +alpha-info

-        if ALPHA
-            +infobox("⚠️ You are viewing the spaCy v2.0.0 alpha docs")
-                strong This page is part of the alpha documentation for spaCy v2.0.
-                |  It does not reflect the state of the latest stable release.
-                |  Because v2.0 is still under development, the implementation
-                |  may differ from the intended state described here. See the
-                |  #[+a(gh("spaCy") + "/releases/tag/v2.0.0-alpha") release notes]
-                |  for details on how to install and test the new version. To
-                |  read the official docs for spaCy v1.x,
-                |  #[+a("https://spacy.io/docs") go here].
-
-        !=yield
+        if IS_MODELS
+            include _page_models
+        else
+            !=yield

    +grid.o-content.u-text
        +grid-col("half")
-            if next && public.docs[SUBSECTION]._data[next]
-                - data = public.docs[SUBSECTION]._data[next]
-
+            if !IS_MODELS
                .o-inline-list
-                    span #[strong.u-text-label Read next:] #[+a(next).u-link=data.title]
+                    +button(gh("spacy", "website/" + current.path.join('/') + ".jade"), false, "secondary", "small")
+                        |  #[span.o-icon Suggest edits] #[+icon("code", 14)]

        +grid-col("half").u-text-right
-            .o-inline-list
-                +button(gh("spacy", "website/" + current.path.join('/') + ".jade"), false, "secondary").u-text-tag Suggest edits #[+icon("code", 14)]
+            if next && public[SECTION]._data[next]
+                - data = public[SECTION]._data[next]
+
+                +grid("vcenter")
+                    +a(next).u-text-small.u-flex-full
+                        h4.u-text-label.u-color-dark Read next
+                        |  #{data.title}
+
+                    +a(next).c-icon-button.c-icon-button--right(aria-hidden="true")
+                        +icon("arrow-right", 24)

    +gitter("spaCy chat")

--- a/website/_includes/_page_models.jade
+++ b/website/_includes/_page_models.jade
@ -0,0 +1,77 @@
+//- 💫 INCLUDES > MODELS PAGE TEMPLATE
+
+for id in CURRENT_MODELS
+    +section(id)
+        +grid("vcenter").o-no-block(id=id)
+            +grid-col("two-thirds")
+                +h(2)
+                    +a("#" + id).u-permalink=id
+
+            +grid-col("third").u-text-right
+                .u-color-subtle.u-text-tiny
+                    +button(gh("spacy-models") + "/releases", true, "secondary", "small")(data-tpl=id data-tpl-key="download")
+                        |  Release details
+                    .u-padding-small Latest: #[code(data-tpl=id data-tpl-key="version") n/a]
+
+        +aside-code("Installation", "bash", "$").
+            spacy download #{id}
+
+        - var comps = getModelComponents(id)
+
+        p(data-tpl=id data-tpl-key="description")
+
+        div(data-tpl=id data-tpl-key="error" style="display: none")
+            +infobox
+                |  Unable to load model details from GitHub. To find out more
+                |  about this model, see the overview of the
+                |  #[+a(gh("spacy-models") + "/releases") latest model releases].
+
+        +table(data-tpl=id data-tpl-key="table")
+            +row
+                +cell #[+label Language]
+                +cell #[+tag=comps.lang] #{LANGUAGES[comps.lang]}
+            for comp, label in {"Type": comps.type, "Genre": comps.genre}
+                +row
+                    +cell #[+label=label]
+                    +cell #[+tag=comp] #{MODEL_META[comp]}
+            +row
+                +cell #[+label Size]
+                +cell #[+tag=comps.size] #[span(data-tpl=id data-tpl-key="size") #[em n/a]]
+
+            each label in ["Pipeline", "Sources", "Author", "License"]
+                - var field = label.toLowerCase()
+                +row
+                    +cell.u-nowrap
+                        +label=label
+                            if MODEL_META[field]
+                                |  #[+help(MODEL_META[field]).u-color-subtle]
+                    +cell
+                        span(data-tpl=id data-tpl-key=field) #[em n/a]
+
+            +row(data-tpl=id data-tpl-key="compat-wrapper" style="display: none")
+                +cell
+                    +label Compat #[+help("Latest compatible model version for your spaCy installation").u-color-subtle]
+                +cell
+                    .o-field.u-float-left
+                        select.o-field__select.u-text-small(data-tpl=id data-tpl-key="compat")
+                    .o-empty(data-tpl=id data-tpl-key="compat-versions") &nbsp;
+
+        section(data-tpl=id data-tpl-key="accuracy-wrapper" style="display: none")
+            +grid.o-no-block
+                +grid-col("third")
+                    +h(4) Accuracy
+                    +table.o-block-small
+                        for label, field in MODEL_ACCURACY
+                            +row(style="display: none")
+                                +cell.u-nowrap
+                                    +label=label
+                                        if MODEL_META[field]
+                                            |  #[+help(MODEL_META[field]).u-color-subtle]
+                                +cell.u-text-right(data-tpl=id data-tpl-key=field)
+                                    |  n/a
+
+                +grid-col("two-thirds")
+                    +h(4) Comparison
+                    +chart(id).u-padding-small
+
+        p.u-text-small.u-color-dark(data-tpl=id data-tpl-key="notes")
--- a/website/_includes/_scripts.jade
+++ b/website/_includes/_scripts.jade
@ -1,27 +1,46 @@
 //- 💫 INCLUDES > SCRIPTS

-script(src="/assets/js/main.js?v#{V_JS}")
-script(src="/assets/js/prism.js")
+if quickstart
+        script(src="/assets/js/quickstart.min.js")

-if SECTION == "docs"
-    if quickstart
-        script(src="/assets/js/quickstart.js")
-        script var qs = new Quickstart("#qs")
+if IS_PAGE
+    script(src="/assets/js/in-view.min.js")

-    script.
-        ((window.gitter = {}).chat = {}).options = {
-            useStyles: false,
-            activationElement: '.js-gitter-button',
-            targetElement: '.js-gitter',
-            room: '!{SOCIAL.gitter}'
-        };
-
-    script(src="https://sidecar.gitter.im/dist/sidecar.v1.js" async defer)
+if HAS_MODELS
+    script(src="/assets/js/chart.min.js")

 if environment == "deploy"
-    script
+    script(async src="https://www.google-analytics.com/analytics.js")
+
+script(src="/assets/js/prism.min.js")
+script(src="/assets/js/main.js?v#{V_JS}")
+
+script
+    | new ProgressBar('.js-progress');
+
+    if changelog
+        | new Changelog('!{SOCIAL.github}', 'spacy');
+
+    if quickstart
+        | new Quickstart("#qs");
+
+    if IS_PAGE
+        | new SectionHighlighter('data-section', 'data-nav');
+        | new GitHubEmbed('!{SOCIAL.github}', 'data-gh-embed');
+        | ((window.gitter = {}).chat = {}).options = {
+        |     useStyles: false,
+        |     activationElement: '.js-gitter-button',
+        |     targetElement: '.js-gitter',
+        |     room: '!{SOCIAL.gitter}'
+        | };
+
+    if HAS_MODELS
+        | new ModelLoader('!{MODELS_REPO}', !{JSON.stringify(CURRENT_MODELS)}, !{JSON.stringify(MODEL_LICENSES)}, !{JSON.stringify(MODEL_ACCURACY)});
+
+    if environment == "deploy"
        | window.ga=window.ga||function(){
        | (ga.q=ga.q||[]).push(arguments)}; ga.l=+new Date;
        | ga('create', '#{ANALYTICS}', 'auto'); ga('send', 'pageview');

-    script(async src="https://www.google-analytics.com/analytics.js")
+if IS_PAGE
+    script(src="https://sidecar.gitter.im/dist/sidecar.v1.js" async defer)
--- a/website/_includes/_sidebar.jade
+++ b/website/_includes/_sidebar.jade
@ -1,13 +1,23 @@
 //- 💫 INCLUDES > SIDEBAR

-include _mixins
-
 menu.c-sidebar.js-sidebar.u-text
    if sidebar_content
-        each items, menu in sidebar_content
-            ul.c-sidebar__section.o-block
-                li.u-text-label.u-color-subtle=menu
+        each items, sectiontitle in sidebar_content
+            ul.c-sidebar__section.o-block-small
+                li.u-text-label.u-color-dark=sectiontitle

                each url, item in items
-                    li(class=(CURRENT == url || (CURRENT == "index" && url == "./")) ? "is-active" : null)
-                        +a(url)=item
+                    - var is_current = CURRENT == url || (CURRENT == "index" && url == "./")
+                    li.c-sidebar__item
+                        +a(url)(class=is_current ? "is-active" : null)=item
+
+                        if is_current
+                            if IS_MODELS && CURRENT_MODELS.length
+                                - menu = Object.assign({}, ...CURRENT_MODELS.map(id => ({ [id]: id })))
+                            if menu
+                                ul.c-sidebar__crumb.u-hidden-sm
+                                    - var counter = 0
+                                    for id, title in menu
+                                        - counter++
+                                        li.c-sidebar__crumb__item(data-nav=id class=(counter == 1) ? "is-active" : null)
+                                            +a("#section-" + id)=title
--- a/website/_includes/_svg.jade
+++ b/website/_includes/_svg.jade
--- a/website/_layout.jade
+++ b/website/_layout.jade
@ -2,11 +2,16 @@

 include _includes/_mixins

+- title = IS_MODELS ? LANGUAGES[current.source] || title : title
+- social_title = (SECTION == "index") ? SITENAME + " - " + SLOGAN : title + " - " + SITENAME
+- social_img = SITE_URL + "/assets/img/social/preview_" + (preview || ALPHA ? "alpha" : "default") + ".jpg"
+
 doctype html
 html(lang="en")
    title
-        if SECTION == "docs" && SUBSECTION && SUBSECTION != "index"
-            | #{title} | #{SITENAME} #{SUBSECTION == "api" ? "API" : "Usage"} Documentation
+        if SECTION == "api" || SECTION == "usage" || SECTION == "models"
+            - var title_section = (SECTION == "api") ? "API" : SECTION.charAt(0).toUpperCase() + SECTION.slice(1)
+            | #{title} | #{SITENAME} #{title_section} Documentation

        else if SECTION != "index"
            | #{title} | #{SITENAME}
@ -22,32 +27,30 @@ html(lang="en")
    meta(property="og:type" content="website")
    meta(property="og:site_name" content=sitename)
    meta(property="og:url" content="#{SITE_URL}/#{current.path.join('/')}")
-    meta(property="og:title" content="#{title} - spaCy")
+    meta(property="og:title" content=social_title)
    meta(property="og:description" content=description)
-    meta(property="og:image" content=getSocialImg())
+    meta(property="og:image" content=social_img)

    meta(name="twitter:card" content="summary_large_image")
    meta(name="twitter:site" content="@" + SOCIAL.twitter)
-    meta(name="twitter:title" content="#{title} - spaCy")
+    meta(name="twitter:title" content=social_title)
    meta(name="twitter:description" content=description)
-    meta(name="twitter:image" content=getSocialImg())
+    meta(name="twitter:image" content=social_img)

    link(rel="shortcut icon" href="/assets/img/favicon.ico")
    link(rel="icon" type="image/x-icon" href="/assets/img/favicon.ico")

-    if ALPHA && SECTION == "docs"
+    if SECTION == "api"
        link(href="/assets/css/style_green.css?v#{V_CSS}" rel="stylesheet")

-    else if SUBSECTION == "usage"
-        link(href="/assets/css/style_red.css?v#{V_CSS}" rel="stylesheet")
-
    else
        link(href="/assets/css/style.css?v#{V_CSS}" rel="stylesheet")

    body
+        include _includes/_svg
        include _includes/_navigation

-        if SECTION == "docs"
+        if !landing
            include _includes/_page-docs

        else
--- a/website/api/_annotation/_biluo.jade
+++ b/website/api/_annotation/_biluo.jade
@ -0,0 +1,43 @@
+//- 💫 DOCS > API > ANNOTATION > BILUO
+
+table([ "Tag", "Description" ])
+    +row
+        +cell #[code #[span.u-color-theme B] EGIN]
+        +cell The first token of a multi-token entity.
+
+    +row
+        +cell #[code #[span.u-color-theme I] N]
+        +cell An inner token of a multi-token entity.
+
+    +row
+        +cell #[code #[span.u-color-theme L] AST]
+        +cell The final token of a multi-token entity.
+
+    +row
+        +cell #[code #[span.u-color-theme U] NIT]
+        +cell A single-token entity.
+
+    +row
+        +cell #[code #[span.u-color-theme O] UT]
+        +cell A non-entity token.
+
+aside("Why BILUO, not IOB?")
+    |  There are several coding schemes for encoding entity annotations as
+    |  token tags.  These coding schemes are equally expressive, but not
+    |  necessarily equally learnable.
+    |  #[+a("http://www.aclweb.org/anthology/W09-1119") Ratinov and Roth]
+    |  showed that the minimal #[strong Begin], #[strong In], #[strong Out]
+    |  scheme was more difficult to learn than the #[strong BILUO] scheme that
+    |  we use, which explicitly marks boundary tokens.
+
+p
+    |  spaCy translates the character offsets into this scheme, in order to
+    |  decide the cost of each action given the current state of the entity
+    |  recogniser. The costs are then used to calculate the gradient of the
+    |  loss, to train the model. The exact algorithm is a pastiche of
+    |  well-known methods, and is not currently described in any single
+    |  publication. The model is a greedy transition-based parser guided by a
+    |  linear model whose weights are learned using the averaged perceptron
+    |  loss, via the #[+a("http://www.aclweb.org/anthology/C12-1059") dynamic oracle]
+    |  imitation learning strategy. The transition system is equivalent to the
+    |  BILOU tagging scheme.
--- a/website/docs/api/_annotation/_dep-labels.jade
+++ b/website/docs/api/_annotation/_dep-labels.jade
--- a/website/docs/api/_annotation/_named-entities.jade
+++ b/website/docs/api/_annotation/_named-entities.jade
--- a/website/docs/api/_annotation/_pos-tags.jade
+++ b/website/docs/api/_annotation/_pos-tags.jade
--- a/website/api/_architecture/_cython.jade
+++ b/website/api/_architecture/_cython.jade
@ -0,0 +1,115 @@
+//- 💫 DOCS > API > ARCHITECTURE > CYTHON
+
+aside("What's Cython?")
+    |  #[+a("http://cython.org/") Cython] is a language for writing
+    |  C extensions for Python. Most Python code is also valid Cython, but
+    |  you can add type declarations to get efficient memory-managed code
+    |  just like C or C++.
+
+p
+    |  spaCy's core data structures are implemented as
+    |  #[+a("http://cython.org/") Cython] #[code cdef] classes. Memory is
+    |  managed through the #[+a(gh("cymem")) #[code cymem]]
+    |  #[code cymem.Pool] class, which allows you
+    |  to allocate memory which will be freed when the #[code Pool] object
+    |  is garbage collected. This means you usually don't have to worry
+    |  about freeing memory. You just have to decide which Python object
+    |  owns the memory, and make it own the #[code Pool]. When that object
+    |  goes out of scope, the memory will be freed. You do have to take
+    |  care that no pointers outlive the object that owns them — but this
+    |  is generally quite easy.
+
+p
+    |  All Cython modules should have the #[code # cython: infer_types=True]
+    |  compiler directive at the top of the file. This makes the code much
+    |  cleaner, as it avoids the need for many type declarations. If
+    |  possible, you should prefer to declare your functions #[code nogil],
+    |  even if you don't especially care about multi-threading. The reason
+    |  is that #[code nogil] functions help the Cython compiler reason about
+    |  your code quite a lot — you're telling the compiler that no Python
+    |  dynamics are possible. This lets many errors be raised, and ensures
+    |  your function will run at C speed.
+
+
+p
+    |  Cython gives you many choices of sequences: you could have a Python
+    |  list, a numpy array, a memory view, a C++ vector, or a pointer.
+    |  Pointers are preferred, because they are fastest, have the most
+    |  explicit semantics, and let the compiler check your code more
+    |  strictly. C++ vectors are also great — but you should only use them
+    |  internally in functions. It's less friendly to accept a vector as an
+    |  argument, because that asks the user to do much more work. Here's
+    |  how to get a pointer from a numpy array, memory view or vector:
+
+code.
+    cdef void get_pointers(np.ndarray[int, mode='c'] numpy_array, vector[int] cpp_vector, int[::1] memory_view) nogil:
+    pointer1 = &lt;int*&gt;numpy_array.data
+    pointer2 = cpp_vector.data()
+    pointer3 = &memory_view[0]
+
+p
+    |  Both C arrays and C++ vectors reassure the compiler that no Python
+    |  operations are possible on your variable. This is a big advantage:
+    |  it lets the Cython compiler raise many more errors for you.
+
+p
+    |  When getting a pointer from a numpy array or memoryview, take care
+    |  that the data is actually stored in C-contiguous order — otherwise
+    |  you'll get a pointer to nonsense. The type-declarations in the code
+    |  above should generate runtime errors if buffers with incorrect
+    |  memory layouts are passed in. To iterate over the array, the
+    |  following style is preferred:
+
+code.
+    cdef int c_total(const int* int_array, int length) nogil:
+        total = 0
+        for item in int_array[:length]:
+            total += item
+        return total
+
+p
+    |  If this is confusing, consider that the compiler couldn't deal with
+    |  #[code for item in int_array:] — there's no length attached to a raw
+    |  pointer, so how could we figure out where to stop? The length is
+    |  provided in the slice notation as a solution to this. Note that we
+    |  don't have to declare the type of #[code item] in the code above —
+    |  the compiler can easily infer it. This gives us tidy code that looks
+    |  quite like Python, but is exactly as fast as C — because we've made
+    |  sure the compilation to C is trivial.
+
+p
+    |  Your functions cannot be declared #[code nogil] if they need to
+    |  create Python objects or call Python functions. This is perfectly
+    |  okay — you shouldn't torture your code just to get #[code nogil]
+    |  functions. However, if your function isn't #[code nogil], you should
+    |  compile your module with #[code cython -a --cplus my_module.pyx] and
+    |  open the resulting #[code my_module.html] file in a browser. This
+    |  will let you see how Cython is compiling your code. Calls into the
+    |  Python run-time will be in bright yellow. This lets you easily see
+    |  whether Cython is able to correctly type your code, or whether there
+    |  are unexpected problems.
+
+p
+    |  Working in Cython is very rewarding once you're over the initial
+    |  learning curve. As with C and C++, the first way you write something
+    |  in Cython will often be the performance-optimal approach. In
+    |  contrast, Python optimisation generally requires a lot of
+    |  experimentation. Is it faster to have an #[code if item in my_dict]
+    |  check, or to use #[code .get()]? What about
+    |  #[code try]/#[code except]? Does this numpy operation create a copy?
+    |  There's no way to guess the answers to these questions, and you'll
+    |  usually be dissatisfied with your results — so there's no way to
+    |  know when to stop this process. In the worst case, you'll make a
+    |  mess that invites the next reader to try their luck too. This is
+    |  like one of those
+    |  #[+a("http://www.wemjournal.org/article/S1080-6032%2809%2970088-2/abstract") volcanic gas-traps],
+    |  where the rescuers keep passing out from low oxygen, causing
+    |  another rescuer to follow — only to succumb themselves. In short,
+    |  just say no to optimizing your Python. If it's not fast enough the
+    |  first time, just switch to Cython.
+
+infobox("Resources")
+    +list.o-no-block
+        +item #[+a("http://docs.cython.org/en/latest/") Official Cython documentation] (cython.org)
+        +item #[+a("https://explosion.ai/blog/writing-c-in-cython", true) Writing C in Cython] (explosion.ai)
+        +item #[+a("https://explosion.ai/blog/multithreading-with-cython") Multi-threading spaCy’s parser and named entity recogniser] (explosion.ai)
--- a/website/api/_architecture/_nn-model.jade
+++ b/website/api/_architecture/_nn-model.jade
@ -0,0 +1,141 @@
+//- 💫 DOCS > API > ARCHITECTURE > NN MODEL ARCHITECTURE
+
+p
+    |  The parsing model is a blend of recent results. The two recent
+    |  inspirations have been the work of Eli Klipperwasser and Yoav Goldberg at
+    |  Bar Ilan#[+fn(1)], and the SyntaxNet team from Google. The foundation of
+    |  the parser is still based on the work of Joakim Nivre#[+fn(2)], who
+    |  introduced the transition-based framework#[+fn(3)], the arc-eager
+    |  transition system, and the imitation learning objective. The model is
+    |  implemented using #[+a(gh("thinc")) Thinc], spaCy's machine learning
+    |  library. We first predict context-sensitive vectors for each word in the
+    |  input:
+
+code.
+    (embed_lower | embed_prefix | embed_suffix | embed_shape)
+        &gt;&gt; Maxout(token_width)
+        &gt;&gt; convolution ** 4
+
+p
+    |  This convolutional layer is shared between the tagger, parser and NER,
+    |  and will also be shared by the future neural lemmatizer. Because the
+    |  parser shares these layers with the tagger, the parser does not require
+    |  tag features. I got this trick from David Weiss's "Stack Combination"
+    |  paper#[+fn(4)].
+
+p
+    |  To boost the representation, the tagger actually predicts a "super tag"
+    |  with POS, morphology and dependency label#[+fn(5)]. The tagger predicts
+    |  these supertags by adding a softmax layer onto the convolutional layer –
+    |  so, we're teaching the convolutional layer to give us a representation
+    |  that's one affine transform from this informative lexical information.
+    |  This is obviously good for the parser (which backprops to the
+    |  convolutions too). The parser model makes a state vector by concatenating
+    |  the vector representations for its context tokens.  The current context
+    |  tokens:
+
+table
+    +row
+        +cell #[code S0], #[code S1], #[code S2]
+        +cell Top three words on the stack.
+
+    +row
+        +cell #[code B0], #[code B1]
+        +cell First two words of the buffer.
+
+    +row
+        +cell.u-nowrap
+            |  #[code S0L1], #[code S1L1], #[code S2L1], #[code B0L1],
+            |  #[code B1L1]#[br]
+            |  #[code S0L2], #[code S1L2], #[code S2L2], #[code B0L2],
+            |  #[code B1L2]
+        +cell
+            |  Leftmost and second leftmost children of #[code S0], #[code S1],
+            |  #[code S2], #[code B0] and #[code B1].
+
+    +row
+        +cell.u-nowrap
+            |  #[code S0R1], #[code S1R1], #[code S2R1], #[code B0R1],
+            |  #[code B1R1]#[br]
+            |  #[code S0R2], #[code S1R2], #[code S2R2], #[code B0R2],
+            |  #[code B1R2]
+        +cell
+            |  Rightmost and second rightmost children of #[code S0], #[code S1],
+            |  #[code S2], #[code B0] and #[code B1].
+
+p
+    |  This makes the state vector quite long: #[code 13*T], where #[code T] is
+    |  the token vector width (128 is working well). Fortunately, there's a way
+    |  to structure the computation to save some expense (and make it more
+    |  GPU-friendly).
+
+p
+    |  The parser typically visits #[code 2*N] states for a sentence of length
+    |  #[code N] (although it may visit more, if it back-tracks with a
+    |  non-monotonic transition#[+fn(4)]). A naive implementation would require
+    |  #[code 2*N (B, 13*T) @ (13*T, H)] matrix multiplications for a batch of
+    |  size #[code B]. We can instead perform one #[code (B*N, T) @ (T, 13*H)]
+    |  multiplication, to pre-compute the hidden weights for each positional
+    |  feature with respect to the words in the batch. (Note that our token
+    |  vectors come from the CNN — so we can't play this trick over the
+    |  vocabulary. That's how Stanford's NN parser#[+fn(3)] works — and why its
+    |  model is so big.)
+
+p
+    |  This pre-computation strategy allows a nice compromise between
+    |  GPU-friendliness and implementation simplicity. The CNN and the wide
+    |  lower layer are computed on the GPU, and then the precomputed hidden
+    |  weights are moved to the CPU, before we start the transition-based
+    |  parsing process. This makes a lot of things much easier. We don't have to
+    |  worry about variable-length batch sizes, and we don't have to implement
+    |  the dynamic oracle in CUDA to train.
+
+p
+    |  Currently the parser's loss function is multilabel log loss#[+fn(6)], as
+    |  the dynamic oracle allows multiple states to be 0 cost. This is defined
+    |  as follows, where #[code gZ] is the sum of the scores assigned to gold
+    |  classes:
+
+code.
+    (exp(score) / Z) - (exp(score) / gZ)
+
+bibliography
+    +item
+        |  #[+a("https://www.semanticscholar.org/paper/Simple-and-Accurate-Dependency-Parsing-Using-Bidir-Kiperwasser-Goldberg/3cf31ecb2724b5088783d7c96a5fc0d5604cbf41") Simple and Accurate Dependency Parsing Using Bidirectional LSTM Feature Representations]
+        br
+        |  Eliyahu Kiperwasser, Yoav Goldberg. (2016)
+
+    +item
+        |  #[+a("https://www.semanticscholar.org/paper/A-Dynamic-Oracle-for-Arc-Eager-Dependency-Parsing-Goldberg-Nivre/22697256ec19ecc3e14fcfc63624a44cf9c22df4") A Dynamic Oracle for Arc-Eager Dependency Parsing]
+        br
+        |  Yoav Goldberg, Joakim Nivre (2012)
+
+    +item
+        |  #[+a("https://explosion.ai/blog/parsing-english-in-python") Parsing English in 500 Lines of Python]
+        br
+        |  Matthew Honnibal (2013)
+
+    +item
+        |  #[+a("https://www.semanticscholar.org/paper/Stack-propagation-Improved-Representation-Learning-Zhang-Weiss/0c133f79b23e8c680891d2e49a66f0e3d37f1466") Stack-propagation: Improved Representation Learning for Syntax]
+        br
+        |  Yuan Zhang, David Weiss (2016)
+
+    +item
+        |  #[+a("https://www.semanticscholar.org/paper/Deep-multi-task-learning-with-low-level-tasks-supe-S%C3%B8gaard-Goldberg/03ad06583c9721855ccd82c3d969a01360218d86") Deep multi-task learning with low level tasks supervised at lower layers]
+        br
+        |  Anders Søgaard, Yoav Goldberg (2016)
+
+    +item
+        |  #[+a("https://www.semanticscholar.org/paper/An-Improved-Non-monotonic-Transition-System-for-De-Honnibal-Johnson/4094cee47ade13b77b5ab4d2e6cb9dd2b8a2917c") An Improved Non-monotonic Transition System for Dependency Parsing]
+        br
+        |  Matthew Honnibal, Mark Johnson (2015)
+
+    +item
+        |  #[+a("http://cs.stanford.edu/people/danqi/papers/emnlp2014.pdf") A Fast and Accurate Dependency Parser using Neural Networks]
+        br
+        |  Danqi Cheng, Christopher D. Manning (2014)
+
+    +item
+        |  #[+a("https://www.semanticscholar.org/paper/Parsing-the-Wall-Street-Journal-using-a-Lexical-Fu-Riezler-King/0ad07862a91cd59b7eb5de38267e47725a62b8b2") Parsing the Wall Street Journal using a Lexical-Functional Grammar and Discriminative Estimation Techniques]
+        br
+        |  Stefan Riezler et al. (2002)
--- a/website/docs/api/_data.json
+++ b/website/docs/api/_data.json
@ -1,29 +1,32 @@
 {
    "sidebar": {
-        "Introduction": {
-            "Facts & Figures": "./",
-            "Languages": "language-models",
-            "Annotation Specs": "annotation"
+        "Overview": {
+            "Architecture": "./",
+            "Annotation Specs": "annotation",
+            "Functions": "top-level"
        },
-        "Top-level": {
-            "spacy": "spacy",
-            "displacy": "displacy",
-            "Utility Functions": "util",
-            "Command line": "cli"
-        },
-        "Classes": {
+        "Containers": {
            "Doc": "doc",
            "Token": "token",
            "Span": "span",
+            "Lexeme": "lexeme"
+        },
+
+        "Pipeline": {
            "Language": "language",
-            "Tokenizer": "tokenizer",
+            "Pipe": "pipe",
            "Tensorizer": "tensorizer",
            "Tagger": "tagger",
            "DependencyParser": "dependencyparser",
            "EntityRecognizer": "entityrecognizer",
            "TextCategorizer": "textcategorizer",
+            "Tokenizer": "tokenizer",
+            "Lemmatizer": "lemmatizer",
            "Matcher": "matcher",
-            "Lexeme": "lexeme",
+            "PhraseMatcher": "phrasematcher"
+        },
+
+        "Other": {
            "Vocab": "vocab",
            "StringStore": "stringstore",
            "Vectors": "vectors",
@ -34,52 +37,37 @@
    },

    "index": {
-        "title": "Facts & Figures",
-        "next": "language-models"
+        "title": "Architecture",
+        "next": "annotation",
+        "menu": {
+            "Basics": "basics",
+            "Neural Network Model": "nn-model",
+            "Cython Conventions": "cython"
+        }
    },

-    "language-models": {
-        "title": "Languages",
-        "next": "philosophy"
-    },
-
-    "philosophy": {
-        "title": "Philosophy"
-    },
-
-    "spacy": {
-        "title": "spaCy top-level functions",
-        "source": "spacy/__init__.py",
-        "next": "displacy"
-    },
-
-    "displacy": {
-        "title": "displaCy",
-        "tag": "module",
-        "source": "spacy/displacy",
-        "next": "util"
-    },
-
-    "util": {
-        "title": "Utility Functions",
-        "source": "spacy/util.py",
-        "next": "cli"
-    },
-
-    "cli": {
-        "title": "Command Line Interface",
-        "source": "spacy/cli"
+    "top-level": {
+        "title": "Top-level Functions",
+        "menu": {
+            "spacy": "spacy",
+            "displacy": "displacy",
+            "Utility Functions": "util",
+            "Compatibility": "compat",
+            "Command Line": "cli"
+        }
    },

    "language": {
        "title": "Language",
        "tag": "class",
+        "teaser": "A text-processing pipeline.",
        "source": "spacy/language.py"
    },

    "doc": {
        "title": "Doc",
        "tag": "class",
+        "teaser": "A container for accessing linguistic annotations.",
        "source": "spacy/tokens/doc.pyx"
    },

@ -103,6 +91,7 @@

    "vocab": {
        "title": "Vocab",
+        "teaser": "A storage class for vocabulary and other data shared across a language.",
        "tag": "class",
        "source": "spacy/vocab.pyx"
    },
@ -115,10 +104,27 @@

    "matcher": {
        "title": "Matcher",
+        "teaser": "Match sequences of tokens, based on pattern rules.",
        "tag": "class",
        "source": "spacy/matcher.pyx"
    },

+    "phrasematcher": {
+        "title": "PhraseMatcher",
+        "teaser": "Match sequences of tokens, based on documents.",
+        "tag": "class",
+        "tag_new": 2,
+        "source": "spacy/matcher.pyx"
+    },
+
+    "pipe": {
+        "title": "Pipe",
+        "teaser": "Abstract base class defining the API for pipeline components.",
+        "tag": "class",
+        "tag_new": 2,
+        "source": "spacy/pipeline.pyx"
+    },
+
    "dependenyparser": {
        "title": "DependencyParser",
        "tag": "class",
@ -127,18 +133,22 @@

    "entityrecognizer": {
        "title": "EntityRecognizer",
+        "teaser": "Annotate named entities on documents.",
        "tag": "class",
        "source": "spacy/pipeline.pyx"
    },

    "textcategorizer": {
        "title": "TextCategorizer",
+        "teaser": "Add text categorization models to spaCy pipelines.",
        "tag": "class",
+        "tag_new": 2,
        "source": "spacy/pipeline.pyx"
    },

    "dependencyparser": {
        "title": "DependencyParser",
+        "teaser": "Annotate syntactic dependencies on documents.",
        "tag": "class",
        "source": "spacy/pipeline.pyx"
    },
@ -149,15 +159,23 @@
        "source": "spacy/tokenizer.pyx"
    },

+    "lemmatizer": {
+        "title": "Lemmatizer",
+        "tag": "class"
+    },
+
    "tagger": {
        "title": "Tagger",
+        "teaser": "Annotate part-of-speech tags on documents.",
        "tag": "class",
        "source": "spacy/pipeline.pyx"
    },

    "tensorizer": {
        "title": "Tensorizer",
+        "teaser": "Add a tensor with position-sensitive meaning representations to a document.",
        "tag": "class",
+        "tag_new": 2,
        "source": "spacy/pipeline.pyx"
    },

@ -169,23 +187,38 @@

    "goldcorpus": {
        "title": "GoldCorpus",
+        "teaser": "An annotated corpus, using the JSON file format.",
        "tag": "class",
+        "tag_new": 2,
        "source": "spacy/gold.pyx"
    },

    "binder": {
        "title": "Binder",
        "tag": "class",
+        "tag_new": 2,
        "source": "spacy/tokens/binder.pyx"
    },

    "vectors": {
        "title": "Vectors",
+        "teaser": "Store, save and load word vectors.",
        "tag": "class",
+        "tag_new": 2,
        "source": "spacy/vectors.pyx"
    },

    "annotation": {
-        "title": "Annotation Specifications"
+        "title": "Annotation Specifications",
+        "teaser": "Schemes used for labels, tags and training data.",
+        "menu": {
+            "Tokenization": "tokenization",
+            "Sentence Boundaries": "sbd",
+            "POS Tagging": "pos-tagging",
+            "Lemmatization": "lemmatization",
+            "Dependencies": "dependency-parsing",
+            "Named Entities": "named-entities",
+            "Training Data": "training"
+        }
    }
 }
--- a/website/api/_top-level/_cli.jade
+++ b/website/api/_top-level/_cli.jade
@ -1,26 +1,17 @@
-//- 💫 DOCS > USAGE > COMMAND LINE INTERFACE
-
-include ../../_includes/_mixins
+//- 💫 DOCS > API > TOP-LEVEL > COMMAND LINE INTERFACE

 p
    |  As of v1.7.0, spaCy comes with new command line helpers to download and
    |  link models and show useful debugging information. For a list of available
    |  commands, type #[code spacy --help].

-+infobox("⚠️ Deprecation note")
-    |  As of spaCy 2.0, the #[code model] command to initialise a model data
-    |  directory is deprecated. The command was only necessary because previous
-    |  versions of spaCy expected a model directory to already be set up. This
-    |  has since been changed, so you can use the #[+api("cli#train") #[code train]]
-    |  command straight away.
-
-+h(2, "download") Download
+h(3, "download") Download

 p
-    |  Download #[+a("/docs/usage/models") models] for spaCy. The downloader finds the
+    |  Download #[+a("/usage/models") models] for spaCy. The downloader finds the
    |  best-matching compatible version, uses pip to download the model as a
    |  package and automatically creates a
-    |  #[+a("/docs/usage/models#usage") shortcut link] to load the model by name.
+    |  #[+a("/usage/models#usage") shortcut link] to load the model by name.
    |  Direct downloads don't perform any compatibility checks and require the
    |  model name to be specified with its version (e.g., #[code en_core_web_sm-1.2.0]).

@ -49,15 +40,15 @@ p
    |  detailed messages in case things go wrong. It's #[strong not recommended]
    |  to use this command as part of an automated process. If you know which
    |  model your project needs, you should consider a
-    |  #[+a("/docs/usage/models#download-pip") direct download via pip], or
+    |  #[+a("/usage/models#download-pip") direct download via pip], or
    |  uploading the model to a local PyPi installation and fetching it straight
    |  from there. This will also allow you to add it as a versioned package
    |  dependency to your project.

-+h(2, "link") Link
+h(3, "link") Link

 p
-    |  Create a #[+a("/docs/usage/models#usage") shortcut link] for a model,
+    |  Create a #[+a("/usage/models#usage") shortcut link] for a model,
    |  either a Python package or a local directory. This will let you load
    |  models from any location using a custom name via
    |  #[+api("spacy#load") #[code spacy.load()]].
@ -95,7 +86,7 @@ p
        +cell flag
        +cell Show help message and available arguments.

-+h(2, "info") Info
+h(3, "info") Info

 p
    |  Print information about your spaCy installation, models and local setup,
@ -122,15 +113,15 @@ p
        +cell flag
        +cell Show help message and available arguments.

-+h(2, "convert") Convert
+h(3, "convert") Convert

 p
-    |  Convert files into spaCy's #[+a("/docs/api/annotation#json-input") JSON format]
+    |  Convert files into spaCy's #[+a("/api/annotation#json-input") JSON format]
    |  for use with the #[code train] command and other experiment management
    |  functions. The right converter is chosen based on the file extension of
    |  the input file. Currently only supports #[code .conllu].

-+code(false, "bash", "$").
+code(false, "bash", "$", false, false, true).
    spacy convert [input_file] [output_dir] [--n-sents] [--morphology]

 +table(["Argument", "Type", "Description"])
@ -159,14 +150,18 @@ p
        +cell flag
        +cell Show help message and available arguments.

-+h(2, "train") Train
+h(3, "train") Train

 p
    |  Train a model. Expects data in spaCy's
-    |  #[+a("/docs/api/annotation#json-input") JSON format].
+    |  #[+a("/api/annotation#json-input") JSON format]. On each epoch, a model
+    |  will be saved out to the directory. Accuracy scores and model details
+    |  will be added to a #[+a("/usage/training#models-generating") #[code meta.json]]
+    |  to allow packaging the model using the
+    |  #[+api("cli#package") #[code package]] command.

-+code(false, "bash", "$").
-    spacy train [lang] [output_dir] [train_data] [dev_data] [--n-iter] [--n-sents] [--use-gpu] [--no-tagger] [--no-parser] [--no-entities]
+code(false, "bash", "$", false, false, true).
+    spacy train [lang] [output_dir] [train_data] [dev_data] [--n-iter] [--n-sents] [--use-gpu] [--meta-path] [--vectors] [--no-tagger] [--no-parser] [--no-entities] [--gold-preproc]

 +table(["Argument", "Type", "Description"])
    +row
@ -204,6 +199,27 @@ p
        +cell option
        +cell Use GPU.

+    +row
+        +cell #[code --vectors], #[code -v]
+        +cell option
+        +cell Model to load vectors from.
+
+    +row
+        +cell #[code --meta-path], #[code -m]
+        +cell option
+        +cell
+            |  #[+tag-new(2)] Optional path to model
+            |  #[+a("/usage/training#models-generating") #[code meta.json]].
+            |  All relevant properties like #[code lang], #[code pipeline] and
+            |  #[code spacy_version] will be overwritten.
+
+    +row
+        +cell #[code --version], #[code -V]
+        +cell option
+        +cell
+            |  Model version. Will be written out to the model's
+            |  #[code meta.json] after training.
+
    +row
        +cell #[code --no-tagger], #[code -T]
        +cell flag
@ -219,12 +235,18 @@ p
        +cell flag
        +cell Don't train NER.

+    +row
+        +cell #[code --gold-preproc], #[code -G]
+        +cell flag
+        +cell Use gold preprocessing.
+
    +row
        +cell #[code --help], #[code -h]
        +cell flag
        +cell Show help message and available arguments.

-+h(3, "train-hyperparams") Environment variables for hyperparameters
+h(4, "train-hyperparams") Environment variables for hyperparameters
+    +tag-new(2)

 p
    |  spaCy lets you set hyperparameters for training via environment variables.
@ -236,98 +258,149 @@ p
 +code(false, "bash").
    parser_hidden_depth=2 parser_maxout_pieces=1 train-parser

-+under-construction
-
 +table(["Name", "Description", "Default"])
    +row
        +cell #[code dropout_from]
-        +cell
+        +cell Initial dropout rate.
        +cell #[code 0.2]

    +row
        +cell #[code dropout_to]
-        +cell
+        +cell Final dropout rate.
        +cell #[code 0.2]

    +row
        +cell #[code dropout_decay]
-        +cell
+        +cell Rate of dropout change.
        +cell #[code 0.0]

    +row
        +cell #[code batch_from]
-        +cell
+        +cell Initial batch size.
        +cell #[code 1]

    +row
        +cell #[code batch_to]
-        +cell
+        +cell Final batch size.
        +cell #[code 64]

    +row
        +cell #[code batch_compound]
-        +cell
+        +cell Rate of batch size acceleration.
        +cell #[code 1.001]

    +row
        +cell #[code token_vector_width]
-        +cell
+        +cell Width of embedding tables and convolutional layers.
        +cell #[code 128]

    +row
        +cell #[code embed_size]
-        +cell
+        +cell Number of rows in embedding tables.
        +cell #[code 7500]

    +row
        +cell #[code parser_maxout_pieces]
-        +cell
+        +cell Number of pieces in the parser's and NER's first maxout layer.
        +cell #[code 2]

    +row
        +cell #[code parser_hidden_depth]
-        +cell
+        +cell Number of hidden layers in the parser and NER.
        +cell #[code 1]

    +row
        +cell #[code hidden_width]
-        +cell
+        +cell Size of the parser's and NER's hidden layers.
        +cell #[code 128]

    +row
        +cell #[code learn_rate]
-        +cell
+        +cell Learning rate.
        +cell #[code 0.001]

    +row
        +cell #[code optimizer_B1]
-        +cell
+        +cell Momentum for the Adam solver.
        +cell #[code 0.9]

    +row
        +cell #[code optimizer_B2]
-        +cell
+        +cell Adagrad-momentum for the Adam solver.
        +cell #[code 0.999]

    +row
        +cell #[code optimizer_eps]
-        +cell
+        +cell Epsylon value for the Adam solver.
        +cell #[code 1e-08]

    +row
        +cell #[code L2_penalty]
-        +cell
+        +cell L2 regularisation penalty.
        +cell #[code 1e-06]

    +row
        +cell #[code grad_norm_clip]
-        +cell
+        +cell Gradient L2 norm constraint.
        +cell #[code 1.0]

-+h(2, "package") Package
+h(3, "evaluate") Evaluate
+    +tag-new(2)

 p
-    |  Generate a #[+a("/docs/usage/saving-loading#generating") model Python package]
+    |  Evaluate a model's accuracy and speed on JSON-formatted annotated data.
+    |  Will print the results and optionally export
+    |  #[+a("/usage/visualizers") displaCy visualizations] of a sample set of
+    |  parses to #[code .html] files. Visualizations for the dependency parse
+    |  and NER will be exported as separate files if the respective component
+    |  is present in the model's pipeline.
+
+code(false, "bash", "$", false, false, true).
+    spacy evaluate [model] [data_path] [--displacy-path] [--displacy-limit] [--gpu-id] [--gold-preproc]
+
+table(["Argument", "Type", "Description"])
+    +row
+        +cell #[code model]
+        +cell positional
+        +cell
+            |  Model to evaluate. Can be a package or shortcut link name, or a
+            |  path to a model data directory.
+
+    +row
+        +cell #[code data_path]
+        +cell positional
+        +cell Location of JSON-formatted evaluation data.
+
+    +row
+        +cell #[code --displacy-path], #[code -dp]
+        +cell option
+        +cell
+            |  Directory to output rendered parses as HTML. If not set, no
+            |  visualizations will be generated.
+
+    +row
+        +cell #[code --displacy-limit], #[code -dl]
+        +cell option
+        +cell
+            |  Number of parses to generate per file. Defaults to #[code 25].
+            |  Keep in mind that a significantly higher number might cause the
+            |  #[code .html] files to render slowly.
+
+    +row
+        +cell #[code --gpu-id], #[code -g]
+        +cell option
+        +cell GPU to use, if any. Defaults to #[code -1] for CPU.
+
+    +row
+        +cell #[code --gold-preproc], #[code -G]
+        +cell flag
+        +cell Use gold preprocessing.
+
+
+h(3, "package") Package
+
+p
+    |  Generate a #[+a("/usage/training#models-generating") model Python package]
    |  from an existing model data directory. All data files are copied over.
    |  If the path to a meta.json is supplied, or a meta.json is found in the
    |  input directory, this file is used. Otherwise, the data can be entered
@ -336,8 +409,8 @@ p
    |  sure you're always using the latest versions. This means you need to be
    |  connected to the internet to use this command.

-+code(false, "bash", "$").
-    spacy package [input_dir] [output_dir] [--meta] [--force]
+code(false, "bash", "$", false, false, true).
+    spacy package [input_dir] [output_dir] [--meta-path] [--create-meta] [--force]

 +table(["Argument", "Type", "Description"])
    +row
@ -353,14 +426,14 @@ p
    +row
        +cell #[code --meta-path], #[code -m]
        +cell option
-        +cell Path to meta.json file (optional).
+        +cell #[+tag-new(2)] Path to meta.json file (optional).

    +row
        +cell #[code --create-meta], #[code -c]
        +cell flag
        +cell
-            |  Create a meta.json file on the command line, even if one already
-            |  exists in the directory.
+            |  #[+tag-new(2)] Create a meta.json file on the command line, even
+            |  if one already exists in the directory.

    +row
        +cell #[code --force], #[code -f]
--- a/website/api/_top-level/_compat.jade
+++ b/website/api/_top-level/_compat.jade
@ -0,0 +1,91 @@
+//- 💫 DOCS > API > TOP-LEVEL > COMPATIBILITY
+
+p
+    |  All Python code is written in an
+    |  #[strong intersection of Python 2 and Python 3]. This is easy in Cython,
+    |  but somewhat ugly in Python. Logic that deals with Python or platform
+    |  compatibility only lives in #[code spacy.compat]. To distinguish them from
+    |  the builtin functions, replacement functions are suffixed with an
+    |  undersocre, e.e #[code unicode_]. For specific checks, spaCy uses the
+    |  #[code six] and #[code ftfy] packages.
+
+aside-code("Example").
+    from spacy.compat import unicode_, json_dumps
+
+    compatible_unicode = unicode_('hello world')
+    compatible_json = json_dumps({'key': 'value'})
+
+table(["Name", "Python 2", "Python 3"])
+    +row
+        +cell #[code compat.bytes_]
+        +cell #[code str]
+        +cell #[code bytes]
+
+    +row
+        +cell #[code compat.unicode_]
+        +cell #[code unicode]
+        +cell #[code str]
+
+    +row
+        +cell #[code compat.basestring_]
+        +cell #[code basestring]
+        +cell #[code str]
+
+    +row
+        +cell #[code compat.input_]
+        +cell #[code raw_input]
+        +cell #[code input]
+
+    +row
+        +cell #[code compat.json_dumps]
+        +cell #[code ujson.dumps] with #[code .decode('utf8')]
+        +cell #[code ujson.dumps]
+
+    +row
+        +cell #[code compat.path2str]
+        +cell #[code str(path)] with #[code .decode('utf8')]
+        +cell #[code str(path)]
+
+h(3, "is_config") compat.is_config
+    +tag function
+
+p
+    |  Check if a specific configuration of Python version and operating system
+    |  matches the user's setup. Mostly used to display targeted error messages.
+
+aside-code("Example").
+    from spacy.compat import is_config
+
+    if is_config(python2=True, windows=True):
+        print("You are using Python 2 on Windows.")
+
+table(["Name", "Type", "Description"])
+    +row
+        +cell #[code python2]
+        +cell bool
+        +cell spaCy is executed with Python 2.x.
+
+    +row
+        +cell #[code python3]
+        +cell bool
+        +cell spaCy is executed with Python 3.x.
+
+    +row
+        +cell #[code windows]
+        +cell bool
+        +cell spaCy is executed on Windows.
+
+    +row
+        +cell #[code linux]
+        +cell bool
+        +cell spaCy is executed on Linux.
+
+    +row
+        +cell #[code osx]
+        +cell bool
+        +cell spaCy is executed on OS X or macOS.
+
+    +row("foot")
+        +cell returns
+        +cell bool
+        +cell Whether the specified configuration matches the user's platform.
--- a/website/api/_top-level/_displacy.jade
+++ b/website/api/_top-level/_displacy.jade
@ -1,14 +1,12 @@
-//- 💫 DOCS > API > DISPLACY
-
-include ../../_includes/_mixins
+//- 💫 DOCS > API > TOP-LEVEL > DISPLACY

 p
    |  As of v2.0, spaCy comes with a built-in visualization suite. For more
    |  info and examples, see the usage guide on
-    |  #[+a("/docs/usage/visualizers") visualizing spaCy].
+    |  #[+a("/usage/visualizers") visualizing spaCy].


-+h(2, "serve") displacy.serve
+h(3, "displacy.serve") displacy.serve
    +tag method
    +tag-new(2)

@ -60,7 +58,7 @@ p
        +cell bool
        +cell
            |  Don't parse #[code Doc] and instead, expect a dict or list of
-            |  dicts. #[+a("/docs/usage/visualizers#manual-usage") See here]
+            |  dicts. #[+a("/usage/visualizers#manual-usage") See here]
            |  for formats and examples.
        +cell #[code False]

@ -70,7 +68,7 @@ p
        +cell Port to serve visualization.
        +cell #[code 5000]

-+h(2, "render") displacy.render
+h(3, "displacy.render") displacy.render
    +tag method
    +tag-new(2)

@ -127,24 +125,24 @@ p Render a dependency parse tree or named entity visualization.
        +cell bool
        +cell
            |  Don't parse #[code Doc] and instead, expect a dict or list of
-            |  dicts. #[+a("/docs/usage/visualizers#manual-usage") See here]
+            |  dicts. #[+a("/usage/visualizers#manual-usage") See here]
            |  for formats and examples.
        +cell #[code False]

-    +footrow
+    +row("foot")
        +cell returns
        +cell unicode
        +cell Rendered HTML markup.
        +cell

-+h(2, "options") Visualizer options
+h(3, "displacy_options") Visualizer options

 p
    |  The #[code options] argument lets you specify additional settings for
    |  each visualizer. If a setting is not present in the options, the default
    |  value will be used.

-+h(3, "options-dep") Dependency Visualizer options
+h(4, "options-dep") Dependency Visualizer options

 +aside-code("Example").
    options = {'compact': True, 'color': 'blue'}
@ -219,7 +217,7 @@ p
        +cell Distance between words in px.
        +cell #[code 175] / #[code 85] (compact)

-+h(3, "options-ent") Named Entity Visualizer options
+h(4, "displacy_options-ent") Named Entity Visualizer options

 +aside-code("Example").
    options = {'ents': ['PERSON', 'ORG', 'PRODUCT'],
@ -244,6 +242,6 @@ p

 p
    |  By default, displaCy comes with colours for all
-    |  #[+a("/docs/api/annotation#named-entities") entity types supported by spaCy].
+    |  #[+a("/api/annotation#named-entities") entity types supported by spaCy].
    |  If you're using custom entity types, you can use the #[code colors]
    |  setting to add your own colours for them.
--- a/website/api/_top-level/_spacy.jade
+++ b/website/api/_top-level/_spacy.jade
@ -1,15 +1,13 @@
-//- 💫 DOCS > API > SPACY
+//- 💫 DOCS > API > TOP-LEVEL > SPACY

-include ../../_includes/_mixins
-
-+h(2, "load") spacy.load
+h(3, "spacy.load") spacy.load
    +tag function
    +tag-model

 p
-    |  Load a model via its #[+a("/docs/usage/models#usage") shortcut link],
+    |  Load a model via its #[+a("/usage/models#usage") shortcut link],
    |  the name of an installed
-    |  #[+a("/docs/usage/saving-loading#generating") model package], a unicode
+    |  #[+a("/usage/training#models-generating") model package], a unicode
    |  path or a #[code Path]-like object. spaCy will try resolving the load
    |  argument in this order. If a model is loaded from a shortcut link or
    |  package name, spaCy will assume it's a Python package and import it and
@ -38,25 +36,57 @@ p
        +cell list
        +cell
            |  Names of pipeline components to
-            |  #[+a("/docs/usage/language-processing-pipeline#disabling") disable].
+            |  #[+a("/usage/processing-pipelines#disabling") disable].

-    +footrow
+    +row("foot")
        +cell returns
        +cell #[code Language]
        +cell A #[code Language] object with the loaded model.

-+infobox("⚠️ Deprecation note")
+infobox("Deprecation note", "⚠️")
    .o-block
        |  As of spaCy 2.0, the #[code path] keyword argument is deprecated. spaCy
        |  will also raise an error if no model could be loaded and never just
        |  return an empty #[code Language] object. If you need a blank language,
-        |  you need to import it explicitly (#[code from spacy.lang.en import English])
-        |  or use #[+api("util#get_lang_class") #[code util.get_lang_class]].
+        |  you can use the new function #[+api("spacy#blank") #[code spacy.blank()]]
+        |  or import the class explicitly, e.g.
+        |  #[code from spacy.lang.en import English].

    +code-new nlp = spacy.load('/model')
    +code-old nlp = spacy.load('en', path='/model')

-+h(2, "info") spacy.info
+h(3, "spacy.blank") spacy.blank
+    +tag function
+    +tag-new(2)
+
+p
+    |  Create a blank model of a given language class. This function is the
+    |  twin of #[code spacy.load()].
+
+aside-code("Example").
+    nlp_en = spacy.blank('en')
+    nlp_de = spacy.blank('de')
+
+table(["Name", "Type", "Description"])
+    +row
+        +cell #[code name]
+        +cell unicode
+        +cell ISO code of the language class to load.
+
+    +row
+        +cell #[code disable]
+        +cell list
+        +cell
+            |  Names of pipeline components to
+            |  #[+a("/usage/processing-pipelines#disabling") disable].
+
+    +row("foot")
+        +cell returns
+        +cell #[code Language]
+        +cell An empty #[code Language] object of the appropriate subclass.
+
+
+h(4, "spacy.info") spacy.info
    +tag function

 p
@ -83,13 +113,13 @@ p
        +cell Print information as Markdown.


-+h(2, "explain") spacy.explain
+h(3, "spacy.explain") spacy.explain
    +tag function

 p
    |  Get a description for a given POS tag, dependency label or entity type.
    |  For a list of available terms, see
-    |  #[+src(gh("spacy", "spacy/glossary.py")) glossary.py].
+    |  #[+src(gh("spacy", "spacy/glossary.py")) #[code glossary.py]].

 +aside-code("Example").
    spacy.explain('NORP')
@ -107,18 +137,18 @@ p
        +cell unicode
        +cell Term to explain.

-    +footrow
+    +row("foot")
        +cell returns
        +cell unicode
        +cell The explanation, or #[code None] if not found in the glossary.

-+h(2, "set_factory") spacy.set_factory
+h(3, "spacy.set_factory") spacy.set_factory
    +tag function
    +tag-new(2)

 p
    |  Set a factory that returns a custom
-    |  #[+a("/docs/usage/language-processing-pipeline") processing pipeline]
+    |  #[+a("/usage/processing-pipelines") processing pipeline]
    |  component. Factories are useful for creating stateful components, especially ones which depend on shared data.

 +aside-code("Example").
--- a/website/api/_top-level/_util.jade
+++ b/website/api/_top-level/_util.jade
@ -1,10 +1,8 @@
-//- 💫 DOCS > API > UTIL
-
-include ../../_includes/_mixins
+//- 💫 DOCS > API > TOP-LEVEL > UTIL

 p
    |  spaCy comes with a small collection of utility functions located in
-    |  #[+src(gh("spaCy", "spacy/util.py")) spacy/util.py].
+    |  #[+src(gh("spaCy", "spacy/util.py")) #[code spacy/util.py]].
    |  Because utility functions are mostly intended for
    |  #[strong internal use within spaCy], their behaviour may change with
    |  future releases. The functions documented on this page should be safe
@ -12,7 +10,7 @@ p
    |  recommend having additional tests in place if your application depends on
    |  any of spaCy's utilities.

-+h(2, "get_data_path") util.get_data_path
+h(3, "util.get_data_path") util.get_data_path
    +tag function

 p
@ -25,12 +23,12 @@ p
        +cell bool
        +cell Only return path if it exists, otherwise return #[code None].

-    +footrow
+    +row("foot")
        +cell returns
        +cell #[code Path] / #[code None]
        +cell Data path or #[code None].

-+h(2, "set_data_path") util.set_data_path
+h(3, "util.set_data_path") util.set_data_path
    +tag function

 p
@ -47,12 +45,12 @@ p
        +cell unicode or #[code Path]
        +cell Path to new data directory.

-+h(2, "get_lang_class") util.get_lang_class
+h(3, "util.get_lang_class") util.get_lang_class
    +tag function

 p
    |  Import and load a #[code Language] class. Allows lazy-loading
-    |  #[+a("/docs/usage/adding-languages") language data] and importing
+    |  #[+a("/usage/adding-languages") language data] and importing
    |  languages using the two-letter language code.

 +aside-code("Example").
@ -67,12 +65,12 @@ p
        +cell unicode
        +cell Two-letter language code, e.g. #[code 'en'].

-    +footrow
+    +row("foot")
        +cell returns
        +cell #[code Language]
        +cell Language class.

-+h(2, "load_model") util.load_model
+h(3, "util.load_model") util.load_model
    +tag function
    +tag-new(2)

@ -101,12 +99,12 @@ p
        +cell -
        +cell Specific overrides, like pipeline components to disable.

-    +footrow
+    +row("foot")
        +cell returns
        +cell #[code Language]
        +cell #[code Language] class with the loaded model.

-+h(2, "load_model_from_path") util.load_model_from_path
+h(3, "util.load_model_from_path") util.load_model_from_path
    +tag function
    +tag-new(2)

@ -139,18 +137,18 @@ p
        +cell -
        +cell Specific overrides, like pipeline components to disable.

-    +footrow
+    +row("foot")
        +cell returns
        +cell #[code Language]
        +cell #[code Language] class with the loaded model.

-+h(2, "load_model_from_init_py") util.load_model_from_init_py
+h(3, "util.load_model_from_init_py") util.load_model_from_init_py
    +tag function
    +tag-new(2)

 p
    |  A helper function to use in the #[code load()] method of a model package's
-    |  #[+src(gh("spacy-dev-resources", "templates/model/en_model_name/__init__.py")) __init__.py].
+    |  #[+src(gh("spacy-dev-resources", "templates/model/en_model_name/__init__.py")) #[code __init__.py]].

 +aside-code("Example").
    from spacy.util import load_model_from_init_py
@ -169,12 +167,12 @@ p
        +cell -
        +cell Specific overrides, like pipeline components to disable.

-    +footrow
+    +row("foot")
        +cell returns
        +cell #[code Language]
        +cell #[code Language] class with the loaded model.

-+h(2, "get_model_meta") util.get_model_meta
+h(3, "util.get_model_meta") util.get_model_meta
    +tag function
    +tag-new(2)

@ -190,17 +188,17 @@ p
        +cell unicode or #[code Path]
        +cell Path to model directory.

-    +footrow
+    +row("foot")
        +cell returns
        +cell dict
        +cell The model's meta data.

-+h(2, "is_package") util.is_package
+h(3, "util.is_package") util.is_package
    +tag function

 p
    |  Check if string maps to a package installed via pip. Mainly used to
-    |  validate #[+a("/docs/usage/models") model packages].
+    |  validate #[+a("/usage/models") model packages].

 +aside-code("Example").
    util.is_package('en_core_web_sm') # True
@ -212,18 +210,18 @@ p
        +cell unicode
        +cell Name of package.

-    +footrow
+    +row("foot")
        +cell returns
        +cell #[code bool]
        +cell #[code True] if installed package, #[code False] if not.

-+h(2, "get_package_path") util.get_package_path
+h(3, "util.get_package_path") util.get_package_path
    +tag function
    +tag-new(2)

 p
    |  Get path to an installed package. Mainly used to resolve the location of
-    |  #[+a("/docs/usage/models") model packages]. Currently imports the package
+    |  #[+a("/usage/models") model packages]. Currently imports the package
    |  to find its path.

 +aside-code("Example").
@ -236,12 +234,12 @@ p
        +cell unicode
        +cell Name of installed package.

-    +footrow
+    +row("foot")
        +cell returns
        +cell #[code Path]
        +cell Path to model package directory.

-+h(2, "is_in_jupyter") util.is_in_jupyter
+h(3, "util.is_in_jupyter") util.is_in_jupyter
    +tag function
    +tag-new(2)

@ -257,17 +255,17 @@ p
        return display(HTML(html))

 +table(["Name", "Type", "Description"])
-    +footrow
+    +row("foot")
        +cell returns
        +cell bool
        +cell #[code True] if in Jupyter, #[code False] if not.

-+h(2, "update_exc") util.update_exc
+h(3, "util.update_exc") util.update_exc
    +tag function

 p
    |  Update, validate and overwrite
-    |  #[+a("/docs/usage/adding-languages#tokenizer-exceptions") tokenizer exceptions].
+    |  #[+a("/usage/adding-languages#tokenizer-exceptions") tokenizer exceptions].
    |  Used to combine global  exceptions with custom, language-specific
    |  exceptions. Will raise an error if key doesn't match #[code ORTH] values.

@ -288,20 +286,20 @@ p
        +cell dicts
        +cell Exception dictionaries to add to the base exceptions, in order.

-    +footrow
+    +row("foot")
        +cell returns
        +cell dict
        +cell Combined tokenizer exceptions.


-+h(2, "prints") util.prints
+h(3, "util.prints") util.prints
    +tag function
    +tag-new(2)

 p
    |  Print a formatted, text-wrapped message with optional title. If a text
    |  argument is a #[code Path], it's converted to a string. Should only
-    |  be used for interactive components like the #[+api("cli") cli].
+    |  be used for interactive components like the command-line interface.

 +aside-code("Example").
    data_path = Path('/some/path')
--- a/website/api/annotation.jade
+++ b/website/api/annotation.jade
@ -0,0 +1,131 @@
+//- 💫 DOCS > API > ANNOTATION SPECS
+
+include ../_includes/_mixins
+
+p This document describes the target annotations spaCy is trained to predict.
+
+
+section("tokenization")
+    +h(2, "tokenization") Tokenization
+
+    p
+        |  Tokenization standards are based on the
+        |  #[+a("https://catalog.ldc.upenn.edu/LDC2013T19") OntoNotes 5] corpus.
+        |  The tokenizer differs from most by including tokens for significant
+        |  whitespace. Any sequence of whitespace characters beyond a single space
+        |  (#[code ' ']) is included as a token.
+
+    +aside-code("Example").
+        from spacy.lang.en import English
+        nlp = English()
+        tokens = nlp('Some\nspaces  and\ttab characters')
+        tokens_text = [t.text for t in tokens]
+        assert tokens_text == ['Some', '\n', 'spaces', ' ', 'and',
+                            '\t', 'tab', 'characters']
+
+    p
+        |  The whitespace tokens are useful for much the same reason punctuation is
+        |  – it's often an important delimiter in the text. By preserving it in the
+        |  token output, we are able to maintain a simple alignment between the
+        |  tokens and the original string, and we ensure that no information is
+        |  lost during processing.
+
+section("sbd")
+    +h(2, "sentence-boundary") Sentence boundary detection
+
+    p
+        |  Sentence boundaries are calculated from the syntactic parse tree, so
+        |  features such as punctuation and capitalisation play an important but
+        |  non-decisive role in determining the sentence boundaries. Usually this
+        |  means that the sentence boundaries will at least coincide with clause
+        |  boundaries, even given poorly punctuated text.
+
+section("pos-tagging")
+    +h(2, "pos-tagging") Part-of-speech Tagging
+
+    +aside("Tip: Understanding tags")
+        |  You can also use #[code spacy.explain()] to get the description for the
+        |  string representation of a tag. For example,
+        |  #[code spacy.explain("RB")] will return "adverb".
+
+    include _annotation/_pos-tags
+
+section("lemmatization")
+    +h(2, "lemmatization") Lemmatization
+
+    p A "lemma" is the uninflected form of a word. In English, this means:
+
+    +list
+        +item #[strong Adjectives]: The form like "happy", not "happier" or "happiest"
+        +item #[strong Adverbs]: The form like "badly", not "worse" or "worst"
+        +item #[strong Nouns]: The form like "dog", not "dogs"; like "child", not "children"
+        +item #[strong Verbs]: The form like "write", not "writes", "writing", "wrote" or "written"
+
+    p
+        |  The lemmatization data is taken from
+        |  #[+a("https://wordnet.princeton.edu") WordNet]. However, we also add a
+        |  special case for pronouns: all pronouns are lemmatized to the special
+        |  token #[code -PRON-].
+
+    +infobox("About spaCy's custom pronoun lemma")
+        |  Unlike verbs and common nouns, there's no clear base form of a personal
+        |  pronoun. Should the lemma of "me" be "I", or should we normalize person
+        |  as well, giving "it" — or maybe "he"? spaCy's solution is to introduce a
+        |  novel symbol, #[code -PRON-], which is used as the lemma for
+        |  all personal pronouns.
+
+section("dependency-parsing")
+    +h(2, "dependency-parsing") Syntactic Dependency Parsing
+
+    +aside("Tip: Understanding labels")
+        |  You can also use #[code spacy.explain()] to get the description for the
+        |  string representation of a label. For example,
+        |  #[code spacy.explain("prt")] will return "particle".
+
+    include _annotation/_dep-labels
+
+section("named-entities")
+    +h(2, "named-entities") Named Entity Recognition
+
+    +aside("Tip: Understanding entity types")
+        |  You can also use #[code spacy.explain()] to get the description for the
+        |  string representation of an entity label. For example,
+        |  #[code spacy.explain("LANGUAGE")] will return "any named language".
+
+    include _annotation/_named-entities
+
+    +h(3, "biluo") BILUO Scheme
+
+    include _annotation/_biluo
+
+section("training")
+    +h(2, "json-input") JSON input format for training
+
+    +under-construction
+
+    p spaCy takes training data in the following format:
+
+    +code("Example structure").
+        doc: {
+            id: string,
+            paragraphs: [{
+                raw: string,
+                sents: [int],
+                tokens: [{
+                    start: int,
+                    tag: string,
+                    head: int,
+                    dep: string
+                }],
+                ner: [{
+                    start: int,
+                    end: int,
+                    label: string
+                }],
+                brackets: [{
+                    start: int,
+                    end: int,
+                    label: string
+                }]
+            }]
+        }
--- a/website/docs/api/binder.jade
+++ b/website/docs/api/binder.jade
@ -1,6 +1,6 @@
 //- 💫 DOCS > API > BINDER

-include ../../_includes/_mixins
+include ../_includes/_mixins

 p A container class for serializing collections of #[code Doc] objects.

--- a/website/api/dependencyparser.jade
+++ b/website/api/dependencyparser.jade
@ -0,0 +1,5 @@
+//- 💫 DOCS > API > DEPENDENCYPARSER
+
+include ../_includes/_mixins
+
+!=partial("pipe", { subclass: "DependencyParser", short: "parser", pipeline_id: "parser" })
--- a/Show More
+++ b/Show More