diff --git a/bin/get_freqs.py b/bin/get_freqs.py
deleted file mode 100755
index 54d90ef8c..000000000
--- a/bin/get_freqs.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/env python
-
-from __future__ import unicode_literals, print_function
-
-import plac
-import joblib
-from os import path
-import os
-import bz2
-import ujson
-from preshed.counter import PreshCounter
-from joblib import Parallel, delayed
-import io
-
-from spacy.en import English
-from spacy.strings import StringStore
-from spacy.attrs import ORTH
-from spacy.tokenizer import Tokenizer
-from spacy.vocab import Vocab
-
-
-def iter_comments(loc):
-    with bz2.BZ2File(loc) as file_:
-        for line in file_:
-            yield ujson.loads(line)
-
-
-def count_freqs(input_loc, output_loc):
-    print(output_loc)
-    vocab = English.default_vocab(get_lex_attr=None)
-    tokenizer = Tokenizer.from_dir(vocab,
-                    path.join(English.default_data_dir(), 'tokenizer'))
-
-    counts = PreshCounter()
-    for json_comment in iter_comments(input_loc):
-        doc = tokenizer(json_comment['body'])
-        doc.count_by(ORTH, counts=counts)
-
-    with io.open(output_loc, 'w', 'utf8') as file_:
-        for orth, freq in counts:
-            string = tokenizer.vocab.strings[orth]
-            if not string.isspace():
-                file_.write('%d\t%s\n' % (freq, string))
-
-
-def parallelize(func, iterator, n_jobs):
-    Parallel(n_jobs=n_jobs)(delayed(func)(*item) for item in iterator)
-
-
-def merge_counts(locs, out_loc):
-    string_map = StringStore()
-    counts = PreshCounter()
-    for loc in locs:
-        with io.open(loc, 'r', encoding='utf8') as file_:
-            for line in file_:
-                freq, word = line.strip().split('\t', 1)
-                orth = string_map[word]
-                counts.inc(orth, int(freq))
-    with io.open(out_loc, 'w', encoding='utf8') as file_:
-        for orth, count in counts:
-            string = string_map[orth]
-            file_.write('%d\t%s\n' % (count, string))
-
-
-@plac.annotations(
-    input_loc=("Location of input file list"),
-    freqs_dir=("Directory for frequency files"),
-    output_loc=("Location for output file"),
-    n_jobs=("Number of workers", "option", "n", int),
-    skip_existing=("Skip inputs where an output file exists", "flag", "s", bool),
-)
-def main(input_loc, freqs_dir, output_loc, n_jobs=2, skip_existing=False):
-    tasks = []
-    outputs = []
-    for input_path in open(input_loc):
-        input_path = input_path.strip()
-        if not input_path:
-            continue
-        filename = input_path.split('/')[-1]
-        output_path = path.join(freqs_dir, filename.replace('bz2', 'freq'))
-        outputs.append(output_path)
-        if not path.exists(output_path) or not skip_existing:
-            tasks.append((input_path, output_path))
-
-    if tasks:
-        parallelize(count_freqs, tasks, n_jobs)
-
-    print("Merge")
-    merge_counts(outputs, output_loc)
-                
-
-if __name__ == '__main__':
-    plac.call(main)
diff --git a/bin/munge_ewtb.py b/bin/munge_ewtb.py
deleted file mode 100755
index 4e21ceb07..000000000
--- a/bin/munge_ewtb.py
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env python
-from __future__ import unicode_literals
-
-from xml.etree import cElementTree as ElementTree
-import json
-import re
-
-import plac
-from pathlib import Path
-from os import path
-
-
-escaped_tokens = {
-    '-LRB-': '(',
-    '-RRB-': ')',
-    '-LSB-': '[',
-    '-RSB-': ']',
-    '-LCB-': '{',
-    '-RCB-': '}',
-}
-
-def read_parses(parse_loc):
-    offset = 0
-    doc = []
-    for parse in open(str(parse_loc) + '.dep').read().strip().split('\n\n'):
-        parse = _adjust_token_ids(parse, offset)
-        offset += len(parse.split('\n'))
-        doc.append(parse)
-    return doc
-
-def _adjust_token_ids(parse, offset):
-    output = []
-    for line in parse.split('\n'):
-        pieces = line.split()
-        pieces[0] = str(int(pieces[0]) + offset)
-        pieces[5] = str(int(pieces[5]) + offset) if pieces[5] != '0' else '0'
-        output.append('\t'.join(pieces))
-    return '\n'.join(output)
-
-
-def _fmt_doc(filename, paras):
-    return {'id': filename, 'paragraphs': [_fmt_para(*para) for para in paras]}
-
-
-def _fmt_para(raw, sents):
-    return {'raw': raw, 'sentences': [_fmt_sent(sent) for sent in sents]}
-
-
-def _fmt_sent(sent):
-    return {
-        'tokens': [_fmt_token(*t.split()) for t in sent.strip().split('\n')],
-        'brackets': []}
-
-
-def _fmt_token(id_, word, hyph, pos, ner, head, dep, blank1, blank2, blank3):
-    head = int(head) - 1
-    id_ = int(id_) - 1
-    head = (head - id_) if head != -1 else 0
-    return {'id': id_, 'orth': word, 'tag': pos, 'dep': dep, 'head': head}
-
-
-tags_re = re.compile(r'<[\w\?/][^>]+>')
-def main(out_dir, ewtb_dir='/usr/local/data/eng_web_tbk'):
-    ewtb_dir = Path(ewtb_dir)
-    out_dir = Path(out_dir)
-    if not out_dir.exists():
-        out_dir.mkdir()
-    for genre_dir in ewtb_dir.joinpath('data').iterdir():
-        #if 'answers' in str(genre_dir): continue
-        parse_dir = genre_dir.joinpath('penntree')
-        docs = []
-        for source_loc in genre_dir.joinpath('source').joinpath('source_original').iterdir():
-            filename = source_loc.parts[-1].replace('.sgm.sgm', '')
-            filename = filename.replace('.xml', '')
-            filename = filename.replace('.txt', '')
-            parse_loc = parse_dir.joinpath(filename + '.xml.tree')
-            parses = read_parses(parse_loc)
-            source = source_loc.open().read().strip()
-            if 'answers' in str(genre_dir):
-                source = tags_re.sub('', source).strip()
-            docs.append(_fmt_doc(filename, [[source, parses]]))
-
-        out_loc = out_dir.joinpath(genre_dir.parts[-1] + '.json')
-        with open(str(out_loc), 'w') as out_file:
-            out_file.write(json.dumps(docs, indent=4))
-
-
-if __name__ == '__main__':
-    plac.call(main)
diff --git a/bin/ner_tag.py b/bin/ner_tag.py
deleted file mode 100644
index f990f21a1..000000000
--- a/bin/ner_tag.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import io
-import plac
-
-from spacy.en import English
-
-
-def main(text_loc):
-    with io.open(text_loc, 'r', encoding='utf8') as file_:
-        text = file_.read()
-    NLU = English()
-    for paragraph in text.split('\n\n'):
-        tokens = NLU(paragraph)
-
-        ent_starts = {}
-        ent_ends = {}
-        for span in tokens.ents:
-            ent_starts[span.start] = span.label_
-            ent_ends[span.end] = span.label_
-
-        output = []
-        for token in tokens:
-            if token.i in ent_starts:
-                output.append('<%s>' % ent_starts[token.i])
-            output.append(token.orth_)
-            if (token.i+1) in ent_ends:
-                output.append('</%s>' % ent_ends[token.i+1])
-        output.append('\n\n')
-    print ' '.join(output)
-
-
-if __name__ == '__main__':
-    plac.call(main)
diff --git a/bin/parser/conll_train.py b/bin/parser/conll_train.py
deleted file mode 100755
index 8075dcd8a..000000000
--- a/bin/parser/conll_train.py
+++ /dev/null
@@ -1,157 +0,0 @@
-#!/usr/bin/env python
-from __future__ import division
-from __future__ import unicode_literals
-
-import os
-from os import path
-import shutil
-import io
-import random
-import time
-import gzip
-
-import plac
-import cProfile
-import pstats
-
-import spacy.util
-from spacy.en import English
-from spacy.gold import GoldParse
-
-from spacy.syntax.util import Config
-from spacy.syntax.arc_eager import ArcEager
-from spacy.syntax.parser import Parser
-from spacy.scorer import Scorer
-from spacy.tagger import Tagger
-
-# Last updated for spaCy v0.97
-
-
-def read_conll(file_):
-    """Read a standard CoNLL/MALT-style format"""
-    sents = []
-    for sent_str in file_.read().strip().split('\n\n'):
-        ids = []
-        words = []
-        heads = []
-        labels = []
-        tags = []
-        for i, line in enumerate(sent_str.split('\n')):
-            word, pos_string, head_idx, label = _parse_line(line)
-            words.append(word)
-            if head_idx < 0:
-                head_idx = i
-            ids.append(i)
-            heads.append(head_idx)
-            labels.append(label)
-            tags.append(pos_string)
-        text = ' '.join(words)
-        annot = (ids, words, tags, heads, labels, ['O'] * len(ids))
-        sents.append((None, [(annot, [])]))
-    return sents
-
-
-def _parse_line(line):
-    pieces = line.split()
-    if len(pieces) == 4:
-        word, pos, head_idx, label = pieces
-        head_idx = int(head_idx)
-    elif len(pieces) == 15:
-        id_ = int(pieces[0].split('_')[-1])
-        word = pieces[1]
-        pos = pieces[4]
-        head_idx = int(pieces[8])-1
-        label = pieces[10]
-    else:
-        id_ = int(pieces[0].split('_')[-1])
-        word = pieces[1]
-        pos = pieces[4]
-        head_idx = int(pieces[6])-1
-        label = pieces[7]
-    if head_idx == 0:
-        label = 'ROOT'
-    return word, pos, head_idx, label
-
-        
-def score_model(scorer, nlp, raw_text, annot_tuples, verbose=False):
-    tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
-    nlp.tagger(tokens)
-    nlp.parser(tokens)
-    gold = GoldParse(tokens, annot_tuples, make_projective=False)
-    scorer.score(tokens, gold, verbose=verbose, punct_labels=('--', 'p', 'punct'))
-
-
-def train(Language, gold_tuples, model_dir, n_iter=15, feat_set=u'basic', seed=0,
-          gold_preproc=False, force_gold=False):
-    dep_model_dir = path.join(model_dir, 'deps')
-    pos_model_dir = path.join(model_dir, 'pos')
-    if path.exists(dep_model_dir):
-        shutil.rmtree(dep_model_dir)
-    if path.exists(pos_model_dir):
-        shutil.rmtree(pos_model_dir)
-    os.mkdir(dep_model_dir)
-    os.mkdir(pos_model_dir)
-
-    Config.write(dep_model_dir, 'config', features=feat_set, seed=seed,
-                 labels=ArcEager.get_labels(gold_tuples))
-
-    nlp = Language(data_dir=model_dir, tagger=False, parser=False, entity=False)
-    nlp.tagger = Tagger.blank(nlp.vocab, Tagger.default_templates())
-    nlp.parser = Parser.from_dir(dep_model_dir, nlp.vocab.strings, ArcEager)
- 
-    print("Itn.\tP.Loss\tUAS\tNER F.\tTag %\tToken %")
-    for itn in range(n_iter):
-        scorer = Scorer()
-        loss = 0
-        for _, sents in gold_tuples:
-            for annot_tuples, _ in sents:
-                if len(annot_tuples[1]) == 1:
-                    continue
-
-                score_model(scorer, nlp, None, annot_tuples, verbose=False)
-
-                tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
-                nlp.tagger(tokens)
-                gold = GoldParse(tokens, annot_tuples, make_projective=True)
-                if not gold.is_projective:
-                    raise Exception(
-                        "Non-projective sentence in training, after we should "
-                        "have enforced projectivity: %s" % annot_tuples
-                    )
- 
-                loss += nlp.parser.train(tokens, gold)
-                nlp.tagger.train(tokens, gold.tags)
-        random.shuffle(gold_tuples)
-        print('%d:\t%d\t%.3f\t%.3f\t%.3f' % (itn, loss, scorer.uas,
-                                             scorer.tags_acc, scorer.token_acc))
-    print('end training')
-    nlp.end_training(model_dir)
-    print('done')
-
-
-@plac.annotations(
-    train_loc=("Location of CoNLL 09 formatted training file"),
-    dev_loc=("Location of CoNLL 09 formatted development file"),
-    model_dir=("Location of output model directory"),
-    eval_only=("Skip training, and only evaluate", "flag", "e", bool),
-    n_iter=("Number of training iterations", "option", "i", int),
-)
-def main(train_loc, dev_loc, model_dir, n_iter=15):
-    with io.open(train_loc, 'r', encoding='utf8') as file_:
-        train_sents = read_conll(file_)
-    if not eval_only:
-        train(English, train_sents, model_dir, n_iter=n_iter)
-    nlp = English(data_dir=model_dir)
-    dev_sents = read_conll(io.open(dev_loc, 'r', encoding='utf8'))
-    scorer = Scorer()
-    for _, sents in dev_sents:
-        for annot_tuples, _ in sents:
-            score_model(scorer, nlp, None, annot_tuples)
-    print('TOK', 100-scorer.token_acc)
-    print('POS', scorer.tags_acc)
-    print('UAS', scorer.uas)
-    print('LAS', scorer.las)
-
-
-if __name__ == '__main__':
-    plac.call(main)
diff --git a/bin/parser/train.py b/bin/parser/train.py
deleted file mode 100755
index 26b545b6d..000000000
--- a/bin/parser/train.py
+++ /dev/null
@@ -1,187 +0,0 @@
-#!/usr/bin/env python
-from __future__ import division
-from __future__ import unicode_literals
-from __future__ import print_function
-
-import os
-from os import path
-import shutil
-import io
-import random
-
-import plac
-import re
-
-import spacy.util
-
-from spacy.syntax.util import Config
-from spacy.gold import read_json_file
-from spacy.gold import GoldParse
-from spacy.gold import merge_sents
-
-from spacy.scorer import Scorer
-
-from spacy.syntax.arc_eager import ArcEager
-from spacy.syntax.ner import BiluoPushDown
-from spacy.tagger import Tagger
-from spacy.syntax.parser import Parser
-from spacy.syntax.nonproj import PseudoProjectivity
-
-
-def _corrupt(c, noise_level):
-    if random.random() >= noise_level:
-        return c
-    elif c == ' ':
-        return '\n'
-    elif c == '\n':
-        return ' '
-    elif c in ['.', "'", "!", "?"]:
-        return ''
-    else:
-        return c.lower()
-
-
-def add_noise(orig, noise_level):
-    if random.random() >= noise_level:
-        return orig
-    elif type(orig) == list:
-        corrupted = [_corrupt(word, noise_level) for word in orig]
-        corrupted = [w for w in corrupted if w]
-        return corrupted
-    else:
-        return ''.join(_corrupt(c, noise_level) for c in orig)
-
-
-def score_model(scorer, nlp, raw_text, annot_tuples, verbose=False):
-    if raw_text is None:
-        tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
-    else:
-        tokens = nlp.tokenizer(raw_text)
-    nlp.tagger(tokens)
-    nlp.entity(tokens)
-    nlp.parser(tokens)
-    gold = GoldParse(tokens, annot_tuples)
-    scorer.score(tokens, gold, verbose=verbose)
-
-
-def train(Language, train_data, dev_data, model_dir, tagger_cfg, parser_cfg, entity_cfg,
-        n_iter=15, seed=0, gold_preproc=False, n_sents=0, corruption_level=0):
-    print("Itn.\tN weight\tN feats\tUAS\tNER F.\tTag %\tToken %")
-    format_str = '{:d}\t{:d}\t{:d}\t{uas:.3f}\t{ents_f:.3f}\t{tags_acc:.3f}\t{token_acc:.3f}'
-    with Language.train(model_dir, train_data,
-            tagger_cfg, parser_cfg, entity_cfg) as trainer:
-        loss = 0
-        for itn, epoch in enumerate(trainer.epochs(n_iter, gold_preproc=gold_preproc,
-                                                   augment_data=None)):
-            for doc, gold in epoch:
-                trainer.update(doc, gold)
-            dev_scores = trainer.evaluate(dev_data, gold_preproc=gold_preproc)
-            print(format_str.format(itn, trainer.nlp.parser.model.nr_weight,
-                trainer.nlp.parser.model.nr_active_feat, **dev_scores.scores))
-
-
-def evaluate(Language, gold_tuples, model_dir, gold_preproc=False, verbose=False,
-             beam_width=None, cand_preproc=None):
-    print("Load parser", model_dir)
-    nlp = Language(path=model_dir)
-    if nlp.lang == 'de':
-        nlp.vocab.morphology.lemmatizer = lambda string,pos: set([string])
-    if beam_width is not None:
-        nlp.parser.cfg.beam_width = beam_width
-    scorer = Scorer()
-    for raw_text, sents in gold_tuples:
-        if gold_preproc:
-            raw_text = None
-        else:
-            sents = merge_sents(sents)
-        for annot_tuples, brackets in sents:
-            if raw_text is None:
-                tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
-                nlp.tagger(tokens)
-                nlp.parser(tokens)
-                nlp.entity(tokens)
-            else:
-                tokens = nlp(raw_text)
-            gold = GoldParse.from_annot_tuples(tokens, annot_tuples)
-            scorer.score(tokens, gold, verbose=verbose)
-    return scorer
-
-
-def write_parses(Language, dev_loc, model_dir, out_loc):
-    nlp = Language(data_dir=model_dir)
-    gold_tuples = read_json_file(dev_loc)
-    scorer = Scorer()
-    out_file = io.open(out_loc, 'w', 'utf8')
-    for raw_text, sents in gold_tuples:
-        sents = _merge_sents(sents)
-        for annot_tuples, brackets in sents:
-            if raw_text is None:
-                tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
-                nlp.tagger(tokens)
-                nlp.entity(tokens)
-                nlp.parser(tokens)
-            else:
-                tokens = nlp(raw_text)
-            #gold = GoldParse(tokens, annot_tuples)
-            #scorer.score(tokens, gold, verbose=False)
-            for sent in tokens.sents:
-                for t in sent:
-                    if not t.is_space:
-                        out_file.write(
-                            '%d\t%s\t%s\t%s\t%s\n' % (t.i, t.orth_, t.tag_, t.head.orth_, t.dep_)
-                        )
-                out_file.write('\n')
-
-
-@plac.annotations(
-    language=("The language to train", "positional", None, str, ['en','de', 'zh']),
-    train_loc=("Location of training file or directory"),
-    dev_loc=("Location of development file or directory"),
-    model_dir=("Location of output model directory",),
-    eval_only=("Skip training, and only evaluate", "flag", "e", bool),
-    corruption_level=("Amount of noise to add to training data", "option", "c", float),
-    gold_preproc=("Use gold-standard sentence boundaries in training?", "flag", "g", bool),
-    out_loc=("Out location", "option", "o", str),
-    n_sents=("Number of training sentences", "option", "n", int),
-    n_iter=("Number of training iterations", "option", "i", int),
-    verbose=("Verbose error reporting", "flag", "v", bool),
-    debug=("Debug mode", "flag", "d", bool),
-    pseudoprojective=("Use pseudo-projective parsing", "flag", "p", bool),
-    L1=("L1 regularization penalty", "option", "L", float),
-)
-def main(language, train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc="", verbose=False,
-         debug=False, corruption_level=0.0, gold_preproc=False, eval_only=False, pseudoprojective=False,
-         L1=1e-6):
-    parser_cfg = dict(locals())
-    tagger_cfg = dict(locals())
-    entity_cfg = dict(locals())
-
-    lang = spacy.util.get_lang_class(language)
-
-    parser_cfg['features'] = lang.Defaults.parser_features
-    entity_cfg['features'] = lang.Defaults.entity_features
-
-    if not eval_only:
-        gold_train = list(read_json_file(train_loc))
-        gold_dev = list(read_json_file(dev_loc))
-        if n_sents > 0:
-            gold_train = gold_train[:n_sents]
-        train(lang, gold_train, gold_dev, model_dir, tagger_cfg, parser_cfg, entity_cfg,
-              n_sents=n_sents, gold_preproc=gold_preproc, corruption_level=corruption_level,
-              n_iter=n_iter)
-    if out_loc:
-        write_parses(lang, dev_loc, model_dir, out_loc)
-    scorer = evaluate(lang, list(read_json_file(dev_loc)),
-                      model_dir, gold_preproc=gold_preproc, verbose=verbose)
-    print('TOK', scorer.token_acc)
-    print('POS', scorer.tags_acc)
-    print('UAS', scorer.uas)
-    print('LAS', scorer.las)
-
-    print('NER P', scorer.ents_p)
-    print('NER R', scorer.ents_r)
-    print('NER F', scorer.ents_f)
-
-
-if __name__ == '__main__':
-    plac.call(main)
diff --git a/bin/parser/train_ud.py b/bin/parser/train_ud.py
deleted file mode 100644
index 53ef906d5..000000000
--- a/bin/parser/train_ud.py
+++ /dev/null
@@ -1,201 +0,0 @@
-from __future__ import unicode_literals, print_function
-import plac
-import json
-import random
-import pathlib
-
-from spacy.tokens import Doc
-from spacy.syntax.nonproj import PseudoProjectivity
-from spacy.language import Language
-from spacy.gold import GoldParse
-from spacy.tagger import Tagger
-from spacy.pipeline import DependencyParser, TokenVectorEncoder
-from spacy.syntax.parser import get_templates
-from spacy.syntax.arc_eager import ArcEager
-from spacy.scorer import Scorer
-from spacy.language_data.tag_map import TAG_MAP as DEFAULT_TAG_MAP
-import spacy.attrs
-import io
-from thinc.neural.ops import CupyOps
-from thinc.neural import Model
-from spacy.es import Spanish
-from spacy.attrs import POS
-
-
-from thinc.neural import Model
-
-
-try:
-    import cupy
-    from thinc.neural.ops import CupyOps
-except:
-    cupy = None
-
-
-def read_conllx(loc, n=0):
-    with io.open(loc, 'r', encoding='utf8') as file_:
-        text = file_.read()
-    i = 0
-    for sent in text.strip().split('\n\n'):
-        lines = sent.strip().split('\n')
-        if lines:
-            while lines[0].startswith('#'):
-                lines.pop(0)
-            tokens = []
-            for line in lines:
-                id_, word, lemma, pos, tag, morph, head, dep, _1, \
-                _2 = line.split('\t')
-                if '-' in id_ or '.' in id_:
-                    continue
-                try:
-                    id_ = int(id_) - 1
-                    head = (int(head) - 1) if head != '0' else id_
-                    dep = 'ROOT' if dep == 'root' else dep #'unlabelled'
-                    tag = pos+'__'+dep+'__'+morph
-                    Spanish.Defaults.tag_map[tag] = {POS: pos}
-                    tokens.append((id_, word, tag, head, dep, 'O'))
-                except:
-                    raise
-            tuples = [list(t) for t in zip(*tokens)]
-            yield (None, [[tuples, []]])
-            i += 1
-            if n >= 1 and i >= n:
-                break
-
-
-def score_model(vocab, encoder, parser, Xs, ys, verbose=False):
-    scorer = Scorer()
-    correct = 0.
-    total = 0.
-    for doc, gold in zip(Xs, ys):
-        doc = Doc(vocab, words=[w.text for w in doc])
-        encoder(doc)
-        parser(doc)
-        PseudoProjectivity.deprojectivize(doc)
-        scorer.score(doc, gold, verbose=verbose)
-        for token, tag in zip(doc, gold.tags):
-            if '_' in token.tag_:
-                univ_guess, _ = token.tag_.split('_', 1)
-            else:
-                univ_guess = ''
-            univ_truth, _ = tag.split('_', 1)
-            correct += univ_guess == univ_truth
-            total += 1
-    return scorer
-
-
-def organize_data(vocab, train_sents):
-    Xs = []
-    ys = []
-    for _, doc_sents in train_sents:
-        for (ids, words, tags, heads, deps, ner), _ in doc_sents:
-            doc = Doc(vocab, words=words)
-            gold = GoldParse(doc, tags=tags, heads=heads, deps=deps)
-            Xs.append(doc)
-            ys.append(gold)
-    return Xs, ys
-
-
-def main(lang_name, train_loc, dev_loc, model_dir, clusters_loc=None):
-    LangClass = spacy.util.get_lang_class(lang_name)
-    train_sents = list(read_conllx(train_loc))
-    dev_sents = list(read_conllx(dev_loc))
-    train_sents = PseudoProjectivity.preprocess_training_data(train_sents)
-
-    actions = ArcEager.get_actions(gold_parses=train_sents)
-    features = get_templates('basic')
-
-    model_dir = pathlib.Path(model_dir)
-    if not model_dir.exists():
-        model_dir.mkdir()
-    if not (model_dir / 'deps').exists():
-        (model_dir / 'deps').mkdir()
-    if not (model_dir / 'pos').exists():
-        (model_dir / 'pos').mkdir()
-    with (model_dir / 'deps' / 'config.json').open('wb') as file_:
-        file_.write(
-            json.dumps(
-                {'pseudoprojective': True, 'labels': actions, 'features': features}).encode('utf8'))
-
-    vocab = LangClass.Defaults.create_vocab()
-    if not (model_dir / 'vocab').exists():
-        (model_dir / 'vocab').mkdir()
-    else:
-        if (model_dir / 'vocab' / 'strings.json').exists():
-            with (model_dir / 'vocab' / 'strings.json').open() as file_:
-                vocab.strings.load(file_)
-            if (model_dir / 'vocab' / 'lexemes.bin').exists():
-                vocab.load_lexemes(model_dir / 'vocab' / 'lexemes.bin')
-
-    if clusters_loc is not None:
-        clusters_loc = pathlib.Path(clusters_loc)
-        with clusters_loc.open() as file_:
-            for line in file_:
-                try:
-                    cluster, word, freq = line.split()
-                except ValueError:
-                    continue
-                lex = vocab[word]
-                lex.cluster = int(cluster[::-1], 2)
-    # Populate vocab
-    for _, doc_sents in train_sents:
-        for (ids, words, tags, heads, deps, ner), _ in doc_sents:
-            for word in words:
-                _ = vocab[word]
-            for dep in deps:
-                _ = vocab[dep]
-            for tag in tags:
-                _ = vocab[tag]
-            if vocab.morphology.tag_map:
-                for tag in tags:
-                    vocab.morphology.tag_map[tag] = {POS: tag.split('__', 1)[0]}
-    tagger = Tagger(vocab)
-    encoder = TokenVectorEncoder(vocab, width=64)
-    parser = DependencyParser(vocab, actions=actions, features=features, L1=0.0)
-
-    Xs, ys = organize_data(vocab, train_sents)
-    dev_Xs, dev_ys = organize_data(vocab, dev_sents)
-    with encoder.model.begin_training(Xs[:100], ys[:100]) as (trainer, optimizer):
-        docs = list(Xs)
-        for doc in docs:
-            encoder(doc)
-        nn_loss = [0.]
-        def track_progress():
-            with encoder.tagger.use_params(optimizer.averages):
-                with parser.model.use_params(optimizer.averages):
-                    scorer = score_model(vocab, encoder, parser, dev_Xs, dev_ys)
-            itn = len(nn_loss)
-            print('%d:\t%.3f\t%.3f\t%.3f' % (itn, nn_loss[-1], scorer.uas, scorer.tags_acc))
-            nn_loss.append(0.)
-        track_progress()
-        trainer.each_epoch.append(track_progress)
-        trainer.batch_size = 24
-        trainer.nb_epoch = 40
-        for docs, golds in trainer.iterate(Xs, ys, progress_bar=True):
-            docs = [Doc(vocab, words=[w.text for w in doc]) for doc in docs]
-            tokvecs, upd_tokvecs = encoder.begin_update(docs)
-            for doc, tokvec in zip(docs, tokvecs):
-                doc.tensor = tokvec
-            d_tokvecs = parser.update(docs, golds, sgd=optimizer)
-            upd_tokvecs(d_tokvecs, sgd=optimizer)
-            encoder.update(docs, golds, sgd=optimizer)
-    nlp = LangClass(vocab=vocab, parser=parser)
-    scorer = score_model(vocab, encoder, parser, read_conllx(dev_loc))
-    print('%d:\t%.3f\t%.3f\t%.3f' % (itn, scorer.uas, scorer.las, scorer.tags_acc))
-    #nlp.end_training(model_dir)
-    #scorer = score_model(vocab, tagger, parser, read_conllx(dev_loc))
-    #print('%d:\t%.3f\t%.3f\t%.3f' % (itn, scorer.uas, scorer.las, scorer.tags_acc))
-
-
-if __name__ == '__main__':
-    import cProfile
-    import pstats
-    if 1:
-        plac.call(main)
-    else:
-        cProfile.runctx("plac.call(main)", globals(), locals(), "Profile.prof")
-    s = pstats.Stats("Profile.prof")
-    s.strip_dirs().sort_stats("time").print_stats()
-
-
-    plac.call(main)
diff --git a/bin/prepare_treebank.py b/bin/prepare_treebank.py
deleted file mode 100644
index f9f4eec21..000000000
--- a/bin/prepare_treebank.py
+++ /dev/null
@@ -1,194 +0,0 @@
-"""Convert OntoNotes into a json format.
-
-doc: {
-    id: string,
-    paragraphs: [{
-        raw: string,
-        sents: [int],
-        tokens: [{
-            start: int,
-            tag: string,
-            head: int,
-            dep: string}],
-        ner: [{
-            start: int,
-            end: int,
-            label: string}],
-        brackets: [{
-            start: int,
-            end: int,
-            label: string}]}]}
-
-Consumes output of spacy/munge/align_raw.py
-"""
-from __future__ import unicode_literals
-import plac
-import json
-from os import path
-import os
-import re
-import io
-from collections import defaultdict
-
-from spacy.munge import read_ptb
-from spacy.munge import read_conll
-from spacy.munge import read_ner
-
-
-def _iter_raw_files(raw_loc):
-    files = json.load(open(raw_loc))
-    for f in files:
-        yield f
-
-
-def format_doc(file_id, raw_paras, ptb_text, dep_text, ner_text):
-    ptb_sents = read_ptb.split(ptb_text)
-    dep_sents = read_conll.split(dep_text)
-    if len(ptb_sents) != len(dep_sents):
-        return None
-    if ner_text is not None:
-        ner_sents = read_ner.split(ner_text)
-    else:
-        ner_sents = [None] * len(ptb_sents)
-
-    i = 0
-    doc = {'id': file_id}
-    if raw_paras is None:
-        doc['paragraphs'] = [format_para(None, ptb_sents, dep_sents, ner_sents)]
-        #for ptb_sent, dep_sent, ner_sent in zip(ptb_sents, dep_sents, ner_sents):
-        #    doc['paragraphs'].append(format_para(None, [ptb_sent], [dep_sent], [ner_sent]))
-    else:
-        doc['paragraphs'] = []
-        for raw_sents in raw_paras:
-            para = format_para(
-                        ' '.join(raw_sents).replace('<SEP>', ''),
-                        ptb_sents[i:i+len(raw_sents)],
-                        dep_sents[i:i+len(raw_sents)],
-                        ner_sents[i:i+len(raw_sents)])
-            if para['sentences']:
-                doc['paragraphs'].append(para)
-            i += len(raw_sents)
-    return doc
-
-
-def format_para(raw_text, ptb_sents, dep_sents, ner_sents):
-    para = {'raw': raw_text, 'sentences': []}
-    offset = 0
-    assert len(ptb_sents) == len(dep_sents) == len(ner_sents)
-    for ptb_text, dep_text, ner_text in zip(ptb_sents, dep_sents, ner_sents):
-        _, deps = read_conll.parse(dep_text, strip_bad_periods=True)
-        if deps and 'VERB' in [t['tag'] for t in deps]:
-            continue
-        if ner_text is not None:
-            _, ner = read_ner.parse(ner_text, strip_bad_periods=True)
-        else:
-            ner = ['-' for _ in deps]
-        _, brackets = read_ptb.parse(ptb_text, strip_bad_periods=True)
-        # Necessary because the ClearNLP converter deletes EDITED words.
-        if len(ner) != len(deps):
-            ner = ['-' for _ in deps]
-        para['sentences'].append(format_sentence(deps, ner, brackets))
-    return para
-
-
-def format_sentence(deps, ner, brackets):
-    sent = {'tokens': [], 'brackets': []}
-    for token_id, (token, token_ent) in enumerate(zip(deps, ner)):
-        sent['tokens'].append(format_token(token_id, token, token_ent))
-
-    for label, start, end in brackets:
-        if start != end:
-            sent['brackets'].append({
-                'label': label,
-                'first': start,
-                'last': (end-1)})
-    return sent
-
-
-def format_token(token_id, token, ner):
-    assert token_id == token['id']
-    head = (token['head'] - token_id) if token['head'] != -1 else 0
-    return {
-        'id': token_id,
-        'orth': token['word'],
-        'tag': token['tag'],
-        'head': head,
-        'dep': token['dep'],
-        'ner': ner}
-
-
-def read_file(*pieces):
-    loc = path.join(*pieces)
-    if not path.exists(loc):
-        return None
-    else:
-        return io.open(loc, 'r', encoding='utf8').read().strip()
-
-
-def get_file_names(section_dir, subsection):
-    filenames = []
-    for fn in os.listdir(path.join(section_dir, subsection)):
-        filenames.append(fn.rsplit('.', 1)[0])
-    return list(sorted(set(filenames)))
-
-
-def read_wsj_with_source(onto_dir, raw_dir):
-    # Now do WSJ, with source alignment
-    onto_dir = path.join(onto_dir, 'data', 'english', 'annotations', 'nw', 'wsj')
-    docs = {}
-    for i in range(25):
-        section = str(i) if i >= 10 else ('0' + str(i))
-        raw_loc = path.join(raw_dir, 'wsj%s.json' % section)
-        for j, (filename, raw_paras) in enumerate(_iter_raw_files(raw_loc)):
-            if section == '00':
-                j += 1
-            if section == '04' and filename == '55':
-                continue
-            ptb = read_file(onto_dir, section, '%s.parse' % filename)
-            dep = read_file(onto_dir, section, '%s.parse.dep' % filename)
-            ner = read_file(onto_dir, section, '%s.name' % filename)
-            if ptb is not None and dep is not None:
-                docs[filename] = format_doc(filename, raw_paras, ptb, dep, ner)
-    return docs
-
-
-def get_doc(onto_dir, file_path, wsj_docs):
-    filename = file_path.rsplit('/', 1)[1]
-    if filename in wsj_docs:
-        return wsj_docs[filename]
-    else:
-        ptb = read_file(onto_dir, file_path + '.parse')
-        dep = read_file(onto_dir, file_path + '.parse.dep')
-        ner = read_file(onto_dir, file_path + '.name')
-        if ptb is not None and dep is not None:
-            return format_doc(filename, None, ptb, dep, ner)
-        else:
-            return None
-
-
-def read_ids(loc):
-    return open(loc).read().strip().split('\n')
-
-
-def main(onto_dir, raw_dir, out_dir):
-    wsj_docs = read_wsj_with_source(onto_dir, raw_dir)
-
-    for partition in ('train', 'test', 'development'):
-        ids = read_ids(path.join(onto_dir, '%s.id' % partition))
-        docs_by_genre = defaultdict(list)
-        for file_path in ids:
-            doc = get_doc(onto_dir, file_path, wsj_docs)
-            if doc is not None:
-                genre = file_path.split('/')[3]
-                docs_by_genre[genre].append(doc)
-        part_dir = path.join(out_dir, partition)
-        if not path.exists(part_dir):
-            os.mkdir(part_dir)
-        for genre, docs in sorted(docs_by_genre.items()):
-            out_loc = path.join(part_dir, genre + '.json')
-            with open(out_loc, 'w') as file_:
-                json.dump(docs, file_, indent=4)
-
-
-if __name__ == '__main__':
-    plac.call(main)
diff --git a/bin/prepare_vecs.py b/bin/prepare_vecs.py
deleted file mode 100644
index b55dafee3..000000000
--- a/bin/prepare_vecs.py
+++ /dev/null
@@ -1,13 +0,0 @@
-"""Read a vector file, and prepare it as binary data, for easy consumption"""
-
-import plac
-
-from spacy.vocab import write_binary_vectors
-
-
-def main(in_loc, out_loc):
-    write_binary_vectors(in_loc, out_loc)
-
-
-if __name__ == '__main__':
-    plac.call(main)
diff --git a/bin/tagger/train.py b/bin/tagger/train.py
deleted file mode 100755
index 9cd8cc011..000000000
--- a/bin/tagger/train.py
+++ /dev/null
@@ -1,175 +0,0 @@
-#!/usr/bin/env python
-from __future__ import division
-from __future__ import unicode_literals
-from __future__ import print_function
-
-import os
-from os import path
-import shutil
-import codecs
-import random
-
-import plac
-import re
-
-import spacy.util
-from spacy.en import English
-
-from spacy.tagger import Tagger
-
-from spacy.syntax.util import Config
-from spacy.gold import read_json_file
-from spacy.gold import GoldParse
-
-from spacy.scorer import Scorer
-
-
-def score_model(scorer, nlp, raw_text, annot_tuples):
-    if raw_text is None:
-        tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
-    else:
-        tokens = nlp.tokenizer(raw_text)
-    nlp.tagger(tokens)
-    gold = GoldParse(tokens, annot_tuples)
-    scorer.score(tokens, gold)
-
-
-def _merge_sents(sents):
-    m_deps = [[], [], [], [], [], []]
-    m_brackets = []
-    i = 0
-    for (ids, words, tags, heads, labels, ner), brackets in sents:
-        m_deps[0].extend(id_ + i for id_ in ids)
-        m_deps[1].extend(words)
-        m_deps[2].extend(tags)
-        m_deps[3].extend(head + i for head in heads)
-        m_deps[4].extend(labels)
-        m_deps[5].extend(ner)
-        m_brackets.extend((b['first'] + i, b['last'] + i, b['label']) for b in brackets)
-        i += len(ids)
-    return [(m_deps, m_brackets)]
-
-
-def train(Language, gold_tuples, model_dir, n_iter=15, feat_set=u'basic',
-          seed=0, gold_preproc=False, n_sents=0, corruption_level=0,
-          beam_width=1, verbose=False,
-          use_orig_arc_eager=False):
-    if n_sents > 0:
-        gold_tuples = gold_tuples[:n_sents]
-   
-    templates = Tagger.default_templates()
-    nlp = Language(data_dir=model_dir, tagger=False)
-    nlp.tagger = Tagger.blank(nlp.vocab, templates)
-
-    print("Itn.\tP.Loss\tUAS\tNER F.\tTag %\tToken %")
-    for itn in range(n_iter):
-        scorer = Scorer()
-        loss = 0
-        for raw_text, sents in gold_tuples:
-            if gold_preproc:
-                raw_text = None
-            else:
-                sents = _merge_sents(sents)
-            for annot_tuples, ctnt in sents:
-                words = annot_tuples[1]
-                gold_tags = annot_tuples[2]
-                score_model(scorer, nlp, raw_text, annot_tuples)
-                if raw_text is None:
-                    tokens = nlp.tokenizer.tokens_from_list(words)
-                else:
-                    tokens = nlp.tokenizer(raw_text)
-                loss += nlp.tagger.train(tokens, gold_tags)
-        random.shuffle(gold_tuples)
-        print('%d:\t%d\t%.3f\t%.3f\t%.3f\t%.3f' % (itn, loss, scorer.uas, scorer.ents_f,
-                                                   scorer.tags_acc,
-                                                   scorer.token_acc))
-    nlp.end_training(model_dir)
-
-def evaluate(Language, gold_tuples, model_dir, gold_preproc=False, verbose=False,
-             beam_width=None):
-    nlp = Language(data_dir=model_dir)
-    if beam_width is not None:
-        nlp.parser.cfg.beam_width = beam_width
-    scorer = Scorer()
-    for raw_text, sents in gold_tuples:
-        if gold_preproc:
-            raw_text = None
-        else:
-            sents = _merge_sents(sents)
-        for annot_tuples, brackets in sents:
-            if raw_text is None:
-                tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
-                nlp.tagger(tokens)
-                nlp.entity(tokens)
-                nlp.parser(tokens)
-            else:
-                tokens = nlp(raw_text, merge_mwes=False)
-            gold = GoldParse(tokens, annot_tuples)
-            scorer.score(tokens, gold, verbose=verbose)
-    return scorer
-
-
-def write_parses(Language, dev_loc, model_dir, out_loc, beam_width=None):
-    nlp = Language(data_dir=model_dir)
-    if beam_width is not None:
-        nlp.parser.cfg.beam_width = beam_width
-    gold_tuples = read_json_file(dev_loc)
-    scorer = Scorer()
-    out_file = codecs.open(out_loc, 'w', 'utf8')
-    for raw_text, sents in gold_tuples:
-        sents = _merge_sents(sents)
-        for annot_tuples, brackets in sents:
-            if raw_text is None:
-                tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
-                nlp.tagger(tokens)
-                nlp.entity(tokens)
-                nlp.parser(tokens)
-            else:
-                tokens = nlp(raw_text, merge_mwes=False)
-            gold = GoldParse(tokens, annot_tuples)
-            scorer.score(tokens, gold, verbose=False)
-            for t in tokens:
-                out_file.write(
-                    '%s\t%s\t%s\t%s\n' % (t.orth_, t.tag_, t.head.orth_, t.dep_)
-                )
-    return scorer
-
-
-@plac.annotations(
-    train_loc=("Location of training file or directory"),
-    dev_loc=("Location of development file or directory"),
-    model_dir=("Location of output model directory",),
-    eval_only=("Skip training, and only evaluate", "flag", "e", bool),
-    corruption_level=("Amount of noise to add to training data", "option", "c", float),
-    gold_preproc=("Use gold-standard sentence boundaries in training?", "flag", "g", bool),
-    out_loc=("Out location", "option", "o", str),
-    n_sents=("Number of training sentences", "option", "n", int),
-    n_iter=("Number of training iterations", "option", "i", int),
-    verbose=("Verbose error reporting", "flag", "v", bool),
-    debug=("Debug mode", "flag", "d", bool),
-)
-def main(train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc="", verbose=False,
-         debug=False, corruption_level=0.0, gold_preproc=False, eval_only=False):
-    if not eval_only:
-        gold_train = list(read_json_file(train_loc))
-        train(English, gold_train, model_dir,
-              feat_set='basic' if not debug else 'debug',
-              gold_preproc=gold_preproc, n_sents=n_sents,
-              corruption_level=corruption_level, n_iter=n_iter,
-              verbose=verbose)
-    #if out_loc:
-    #    write_parses(English, dev_loc, model_dir, out_loc, beam_width=beam_width)
-    scorer = evaluate(English, list(read_json_file(dev_loc)),
-                      model_dir, gold_preproc=gold_preproc, verbose=verbose)
-    print('TOK', scorer.token_acc)
-    print('POS', scorer.tags_acc)
-    print('UAS', scorer.uas)
-    print('LAS', scorer.las)
-
-    print('NER P', scorer.ents_p)
-    print('NER R', scorer.ents_r)
-    print('NER F', scorer.ents_f)
-
-
-if __name__ == '__main__':
-    plac.call(main)
diff --git a/bin/tagger/train_german_tagger.py b/bin/tagger/train_german_tagger.py
deleted file mode 100644
index 4927a6e9a..000000000
--- a/bin/tagger/train_german_tagger.py
+++ /dev/null
@@ -1,160 +0,0 @@
-#!/usr/bin/env python
-from __future__ import division
-from __future__ import unicode_literals
-
-import os
-from os import path
-import shutil
-import io
-import random
-import time
-import gzip
-import ujson
-
-import plac
-import cProfile
-import pstats
-
-import spacy.util
-from spacy.de import German
-from spacy.gold import GoldParse
-from spacy.tagger import Tagger
-from spacy.scorer import PRFScore
-
-from spacy.tagger import P2_orth, P2_cluster, P2_shape, P2_prefix, P2_suffix, P2_pos, P2_lemma, P2_flags 
-from spacy.tagger import P1_orth, P1_cluster, P1_shape, P1_prefix, P1_suffix, P1_pos, P1_lemma, P1_flags 
-from spacy.tagger import W_orth, W_cluster, W_shape, W_prefix, W_suffix, W_pos, W_lemma, W_flags
-from spacy.tagger import N1_orth, N1_cluster, N1_shape, N1_prefix, N1_suffix, N1_pos, N1_lemma, N1_flags
-from spacy.tagger import N2_orth, N2_cluster, N2_shape, N2_prefix, N2_suffix, N2_pos, N2_lemma, N2_flags, N_CONTEXT_FIELDS
-
-
-def default_templates():
-    return spacy.tagger.Tagger.default_templates()
-
-def default_templates_without_clusters():
-    return (
-        (W_orth,),
-        (P1_lemma, P1_pos),
-        (P2_lemma, P2_pos),
-        (N1_orth,),
-        (N2_orth,),
-
-        (W_suffix,),
-        (W_prefix,),
-
-        (P1_pos,),
-        (P2_pos,),
-        (P1_pos, P2_pos),
-        (P1_pos, W_orth),
-        (P1_suffix,),
-        (N1_suffix,),
-
-        (W_shape,),
-
-        (W_flags,),
-        (N1_flags,),
-        (N2_flags,),
-        (P1_flags,),
-        (P2_flags,),
-    )
-
-
-def make_tagger(vocab, templates):
-    model = spacy.tagger.TaggerModel(templates)
-    return spacy.tagger.Tagger(vocab,model)
-
-
-def read_conll(file_):
-    def sentences():
-        words, tags = [], []
-        for line in file_:
-            line = line.strip()
-            if line:
-                word, tag = line.split('\t')[1::3][:2] # get column 1 and 4 (CoNLL09)
-                words.append(word)
-                tags.append(tag)
-            elif words:
-                yield words, tags
-                words, tags = [], []
-        if words:
-            yield words, tags
-    return [ s for s in sentences() ]
-
-        
-def score_model(score, nlp, words, gold_tags):
-    tokens = nlp.tokenizer.tokens_from_list(words)
-    assert(len(tokens) == len(gold_tags))
-    nlp.tagger(tokens)
-
-    for token, gold_tag in zip(tokens,gold_tags):
-        score.score_set(set([token.tag_]),set([gold_tag]))
-
-
-def train(Language, train_sents, dev_sents, model_dir, n_iter=15, seed=21):
-    # make shuffling deterministic
-    random.seed(seed)
-
-    # set up directory for model
-    pos_model_dir = path.join(model_dir, 'pos')
-    if path.exists(pos_model_dir):
-        shutil.rmtree(pos_model_dir)
-    os.mkdir(pos_model_dir)
-
-    nlp = Language(data_dir=model_dir, tagger=False, parser=False, entity=False)
-    nlp.tagger = make_tagger(nlp.vocab,default_templates())
-     
-    print("Itn.\ttrain acc %\tdev acc %")
-    for itn in range(n_iter):
-        # train on train set
-        #train_acc = PRFScore()
-        correct, total = 0., 0.
-        for words, gold_tags in train_sents:
-            tokens = nlp.tokenizer.tokens_from_list(words)
-            correct += nlp.tagger.train(tokens, gold_tags)
-            total += len(words)
-        train_acc = correct/total
-
-        # test on dev set
-        dev_acc = PRFScore()
-        for words, gold_tags in dev_sents:
-            score_model(dev_acc, nlp, words, gold_tags)
-
-        random.shuffle(train_sents)
-        print('%d:\t%6.2f\t%6.2f' % (itn, 100*train_acc, 100*dev_acc.precision))
-
-
-    print('end training')
-    nlp.end_training(model_dir)
-    print('done')
-
-
-@plac.annotations(
-    train_loc=("Location of CoNLL 09 formatted training file"),
-    dev_loc=("Location of CoNLL 09 formatted development file"),
-    model_dir=("Location of output model directory"),
-    eval_only=("Skip training, and only evaluate", "flag", "e", bool),
-    n_iter=("Number of training iterations", "option", "i", int),
-)
-def main(train_loc, dev_loc, model_dir, eval_only=False, n_iter=15):
-    # training
-    if not eval_only:
-        with io.open(train_loc, 'r', encoding='utf8') as trainfile_, \
-             io.open(dev_loc, 'r', encoding='utf8') as devfile_:
-            train_sents = read_conll(trainfile_)
-            dev_sents = read_conll(devfile_)
-        train(German, train_sents, dev_sents, model_dir, n_iter=n_iter)
-
-    # testing
-    with io.open(dev_loc, 'r', encoding='utf8') as file_:
-        dev_sents = read_conll(file_)
-        nlp = German(data_dir=model_dir)
-
-        dev_acc = PRFScore()
-        for words, gold_tags in dev_sents:
-            score_model(dev_acc, nlp, words, gold_tags)                
-        
-        print('POS: %6.2f %%' % (100*dev_acc.precision))
-
-
-if __name__ == '__main__':
-    plac.call(main)
diff --git a/setup.py b/setup.py
index 9fb4970da..37bfd0495 100755
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,6 @@ MOD_NAMES = [
     'spacy.vocab',
     'spacy.attrs',
     'spacy.morphology',
-    'spacy.tagger',
     'spacy.pipeline',
     'spacy.syntax.stateclass',
     'spacy.syntax._state',
diff --git a/spacy/__init__.py b/spacy/__init__.py
index ba2479106..9acc566ad 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -3,8 +3,6 @@ from __future__ import unicode_literals
 
 from .cli.info import info as cli_info
 from .glossary import explain
-from .deprecated import resolve_load_name
-#from .about import __version__
 from .about import __version__
 from . import util
 
diff --git a/spacy/__main__.py b/spacy/__main__.py
index 99d6b116c..48460c9e3 100644
--- a/spacy/__main__.py
+++ b/spacy/__main__.py
@@ -1,7 +1,7 @@
 # coding: utf8
 from __future__ import print_function
 # NB! This breaks in plac on Python 2!!
-#from __future__ import unicode_literals
+# from __future__ import unicode_literals
 
 if __name__ == '__main__':
     import plac
diff --git a/spacy/_ml.py b/spacy/_ml.py
index b60851fda..de89e04d0 100644
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@@ -1,49 +1,42 @@
-import ujson
-from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu, SELU
+# coding: utf8
+from __future__ import unicode_literals
+
+import numpy
+from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu
 from thinc.i2v import HashEmbed, StaticVectors
 from thinc.t2t import ExtractWindow, ParametricAttention
-from thinc.t2v import Pooling, max_pool, mean_pool, sum_pool
+from thinc.t2v import Pooling, sum_pool
 from thinc.misc import Residual
-from thinc.misc import BatchNorm as BN
 from thinc.misc import LayerNorm as LN
-
 from thinc.api import add, layerize, chain, clone, concatenate, with_flatten
-from thinc.api import FeatureExtracter, with_getitem
-from thinc.api import uniqued, wrap, flatten_add_lengths, noop
-
+from thinc.api import FeatureExtracter, with_getitem, flatten_add_lengths
+from thinc.api import uniqued, wrap, noop
 from thinc.linear.linear import LinearModel
 from thinc.neural.ops import NumpyOps, CupyOps
 from thinc.neural.util import get_array_module, copy_array
 from thinc.neural._lsuv import svd_orthonormal
 
-import random
-import cytoolz
-
 from thinc import describe
 from thinc.describe import Dimension, Synapses, Biases, Gradient
 from thinc.neural._classes.affine import _set_dimensions_if_needed
 import thinc.extra.load_nlp
 from thinc.neural._lsuv import svd_orthonormal
 
-from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP, CLUSTER
-from .tokens.doc import Doc
+from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE
 from . import util
 
-import numpy
-import io
-
-# TODO: Unset this once we don't want to support models previous models.
-import thinc.neural._classes.layernorm
-thinc.neural._classes.layernorm.set_compat_six_eight(False)
 
 VECTORS_KEY = 'spacy_pretrained_vectors'
 
+
 @layerize
 def _flatten_add_lengths(seqs, pad=0, drop=0.):
     ops = Model.ops
     lengths = ops.asarray([len(seq) for seq in seqs], dtype='i')
+
     def finish_update(d_X, sgd=None):
         return ops.unflatten(d_X, lengths, pad=pad)
+
     X = ops.flatten(seqs, pad=pad)
     return (X, lengths), finish_update
 
@@ -57,33 +50,14 @@ def _logistic(X, drop=0.):
     X = xp.minimum(X, 10., X)
     X = xp.maximum(X, -10., X)
     Y = 1. / (1. + xp.exp(-X))
+
     def logistic_bwd(dY, sgd=None):
         dX = dY * (Y * (1-Y))
         return dX
+
     return Y, logistic_bwd
 
 
-@layerize
-def add_tuples(X, drop=0.):
-    """Give inputs of sequence pairs, where each sequence is (vals, length),
-    sum the values, returning a single sequence.
-
-    If input is:
-    ((vals1, length), (vals2, length)
-    Output is:
-    (vals1+vals2, length)
-
-    vals are a single tensor for the whole batch.
-    """
-    (vals1, length1), (vals2, length2) = X
-    assert length1 == length2
-
-    def add_tuples_bwd(dY, sgd=None):
-        return (dY, dY)
-
-    return (vals1+vals2, length), add_tuples_bwd
-
-
 def _zero_init(model):
     def _zero_init_impl(self, X, y):
         self.W.fill(0)
@@ -111,13 +85,11 @@ def _preprocess_doc(docs, drop=0.):
     nO=Dimension("Output size"),
     nP=Dimension("Maxout pieces"),
     W=Synapses("Weights matrix",
-        lambda obj: (obj.nF, obj.nO, obj.nP, obj.nI) if obj.nP >= 2
-                    else (obj.nF, obj.nO, obj.nI)),
+        lambda obj: (obj.nF, obj.nO, obj.nP, obj.nI)),
     b=Biases("Bias vector",
-        lambda obj: (obj.nO, obj.nP) if obj.nP >= 2 else (obj.nO,)),
+        lambda obj: (obj.nO, obj.nP)),
     d_W=Gradient("W"),
-    d_b=Gradient("b")
-)
+    d_b=Gradient("b"))
 class PrecomputableAffine(Model):
     def __init__(self, nO=None, nI=None, nF=None, nP=None, **kwargs):
         Model.__init__(self, **kwargs)
@@ -203,89 +175,6 @@ class PrecomputableAffine(Model):
                 break
 
 
-# Thinc's Embed class is a bit broken atm, so drop this here.
-from thinc import describe
-from thinc.neural._classes.embed import _uniform_init
-
-
-@describe.attributes(
-    nV=describe.Dimension("Number of vectors"),
-    nO=describe.Dimension("Size of output"),
-    vectors=describe.Weights("Embedding table",
-        lambda obj: (obj.nV, obj.nO),
-        _uniform_init(-0.1, 0.1)
-    ),
-    d_vectors=describe.Gradient("vectors")
-)
-class Embed(Model):
-    name = 'embed'
-
-    def __init__(self, nO, nV=None, **kwargs):
-        if nV is not None:
-            nV += 1
-        Model.__init__(self, **kwargs)
-        if 'name' in kwargs:
-            self.name = kwargs['name']
-        self.column = kwargs.get('column', 0)
-        self.nO = nO
-        self.nV = nV
-
-    def predict(self, ids):
-        if ids.ndim == 2:
-            ids = ids[:, self.column]
-        return self.ops.xp.ascontiguousarray(self.vectors[ids], dtype='f')
-
-    def begin_update(self, ids, drop=0.):
-        if ids.ndim == 2:
-            ids = ids[:, self.column]
-        vectors = self.ops.xp.ascontiguousarray(self.vectors[ids], dtype='f')
-        def backprop_embed(d_vectors, sgd=None):
-            n_vectors = d_vectors.shape[0]
-            self.ops.scatter_add(self.d_vectors, ids, d_vectors)
-            if sgd is not None:
-                sgd(self._mem.weights, self._mem.gradient, key=self.id)
-            return None
-        return vectors, backprop_embed
-
-
-def HistoryFeatures(nr_class, hist_size=8, nr_dim=8):
-    '''Wrap a model, adding features representing action history.'''
-    if hist_size == 0:
-        return layerize(noop())
-    embed_tables = [Embed(nr_dim, nr_class, column=i, name='embed%d')
-                    for i in range(hist_size)]
-    embed = chain(concatenate(*embed_tables),
-                  LN(Maxout(hist_size*nr_dim, hist_size*nr_dim)))
-    ops = embed.ops
-    def add_history_fwd(vectors_hists, drop=0.):
-        vectors, hist_ids = vectors_hists
-        hist_feats, bp_hists = embed.begin_update(hist_ids, drop=drop)
-        outputs = ops.xp.hstack((vectors, hist_feats))
-
-        def add_history_bwd(d_outputs, sgd=None):
-            d_vectors = d_outputs[:, :vectors.shape[1]]
-            d_hists = d_outputs[:, vectors.shape[1]:]
-            bp_hists(d_hists, sgd=sgd)
-            return embed.ops.xp.ascontiguousarray(d_vectors)
-        return outputs, add_history_bwd
-    return wrap(add_history_fwd, embed)
-
-
-def drop_layer(layer, factor=2.):
-    def drop_layer_fwd(X, drop=0.):
-        if drop <= 0.:
-            return layer.begin_update(X, drop=drop)
-        else:
-            coinflip = layer.ops.xp.random.random()
-            if (coinflip / factor) >= drop:
-                return layer.begin_update(X, drop=drop)
-            else:
-                return X, lambda dX, sgd=None: dX
-
-    model = wrap(drop_layer_fwd, layer)
-    model.predict = layer
-    return model
-
 def link_vectors_to_models(vocab):
     vectors = vocab.vectors
     ops = Model.ops
@@ -299,16 +188,21 @@ def link_vectors_to_models(vocab):
     # (unideal, I know)
     thinc.extra.load_nlp.VECTORS[(ops.device, VECTORS_KEY)] = data
 
+
 def Tok2Vec(width, embed_size, **kwargs):
     pretrained_dims = kwargs.get('pretrained_dims', 0)
     cnn_maxout_pieces = kwargs.get('cnn_maxout_pieces', 2)
     cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
-    with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add,
-                                 '*': reapply}):
-        norm = HashEmbed(width, embed_size, column=cols.index(NORM), name='embed_norm')
-        prefix = HashEmbed(width, embed_size//2, column=cols.index(PREFIX), name='embed_prefix')
-        suffix = HashEmbed(width, embed_size//2, column=cols.index(SUFFIX), name='embed_suffix')
-        shape = HashEmbed(width, embed_size//2, column=cols.index(SHAPE), name='embed_shape')
+    with Model.define_operators({'>>': chain, '|': concatenate, '**': clone,
+                                 '+': add, '*': reapply}):
+        norm = HashEmbed(width, embed_size, column=cols.index(NORM),
+                         name='embed_norm')
+        prefix = HashEmbed(width, embed_size//2, column=cols.index(PREFIX),
+                           name='embed_prefix')
+        suffix = HashEmbed(width, embed_size//2, column=cols.index(SUFFIX),
+                           name='embed_suffix')
+        shape = HashEmbed(width, embed_size//2, column=cols.index(SHAPE),
+                          name='embed_shape')
         if pretrained_dims is not None and pretrained_dims >= 1:
             glove = StaticVectors(VECTORS_KEY, width, column=cols.index(ID))
 
@@ -320,7 +214,6 @@ def Tok2Vec(width, embed_size, **kwargs):
                 (norm | prefix | suffix | shape)
                 >> LN(Maxout(width, width*4, pieces=3)), column=5)
 
-
         convolution = Residual(
             ExtractWindow(nW=1)
             >> LN(Maxout(width, width*3, pieces=cnn_maxout_pieces))
@@ -344,6 +237,7 @@ def reapply(layer, n_times):
             Y, backprop = layer.begin_update(X, drop=drop)
             X = Y
             backprops.append(backprop)
+
         def reapply_bwd(dY, sgd=None):
             dX = None
             for backprop in reversed(backprops):
@@ -353,6 +247,7 @@ def reapply(layer, n_times):
                 else:
                     dX += dY
             return dX
+
         return Y, reapply_bwd
     return wrap(reapply_fwd, layer)
 
@@ -367,13 +262,14 @@ def _divide_array(X, size):
     parts = []
     index = 0
     while index < len(X):
-        parts.append(X[index : index + size])
+        parts.append(X[index:index + size])
         index += size
     return parts
 
 
 def get_col(idx):
     assert idx >= 0, idx
+
     def forward(X, drop=0.):
         assert idx >= 0, idx
         if isinstance(X, numpy.ndarray):
@@ -381,30 +277,28 @@ def get_col(idx):
         else:
             ops = CupyOps()
         output = ops.xp.ascontiguousarray(X[:, idx], dtype=X.dtype)
+
         def backward(y, sgd=None):
             assert idx >= 0, idx
             dX = ops.allocate(X.shape)
             dX[:, idx] += y
             return dX
+
         return output, backward
+
     return layerize(forward)
 
 
-def zero_init(model):
-    def _hook(self, X, y=None):
-        self.W.fill(0)
-    model.on_data_hooks.append(_hook)
-    return model
-
-
 def doc2feats(cols=None):
     if cols is None:
         cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
+
     def forward(docs, drop=0.):
         feats = []
         for doc in docs:
             feats.append(doc.to_array(cols))
         return feats, None
+
     model = layerize(forward)
     model.cols = cols
     return model
@@ -418,28 +312,14 @@ def print_shape(prefix):
 
 @layerize
 def get_token_vectors(tokens_attrs_vectors, drop=0.):
-    ops = Model.ops
     tokens, attrs, vectors = tokens_attrs_vectors
+
     def backward(d_output, sgd=None):
         return (tokens, d_output)
+
     return vectors, backward
 
 
-@layerize
-def flatten(seqs, drop=0.):
-    if isinstance(seqs[0], numpy.ndarray):
-        ops = NumpyOps()
-    elif hasattr(CupyOps.xp, 'ndarray') and isinstance(seqs[0], CupyOps.xp.ndarray):
-        ops = CupyOps()
-    else:
-        raise ValueError("Unable to flatten sequence of type %s" % type(seqs[0]))
-    lengths = [len(seq) for seq in seqs]
-    def finish_update(d_X, sgd=None):
-        return ops.unflatten(d_X, lengths)
-    X = ops.xp.vstack(seqs)
-    return X, finish_update
-
-
 @layerize
 def logistic(X, drop=0.):
     xp = get_array_module(X)
@@ -449,9 +329,11 @@ def logistic(X, drop=0.):
     X = xp.minimum(X, 10., X)
     X = xp.maximum(X, -10., X)
     Y = 1. / (1. + xp.exp(-X))
+
     def logistic_bwd(dY, sgd=None):
         dX = dY * (Y * (1-Y))
         return dX
+
     return Y, logistic_bwd
 
 
@@ -461,6 +343,7 @@ def zero_init(model):
     model.on_data_hooks.append(_zero_init_impl)
     return model
 
+
 @layerize
 def preprocess_doc(docs, drop=0.):
     keys = [doc.to_array([LOWER]) for doc in docs]
@@ -501,8 +384,6 @@ def build_tagger_model(nr_class, **cfg):
 
 @layerize
 def SpacyVectors(docs, drop=0.):
-    xp = get_array_module(docs[0].vocab.vectors.data)
-    width = docs[0].vocab.vectors.data.shape[1]
     batch = []
     for doc in docs:
         indices = numpy.zeros((len(doc),), dtype='i')
@@ -525,9 +406,7 @@ def build_text_classifier(nr_class, width=64, **cfg):
             model = (
                 SpacyVectors
                 >> flatten_add_lengths
-                >> with_getitem(0,
-                    Affine(width, pretrained_dims)
-                )
+                >> with_getitem(0, Affine(width, pretrained_dims))
                 >> ParametricAttention(width)
                 >> Pooling(sum_pool)
                 >> Residual(ReLu(width, width)) ** 2
@@ -536,7 +415,6 @@ def build_text_classifier(nr_class, width=64, **cfg):
             )
             return model
 
-
         lower = HashEmbed(width, nr_vector, column=1)
         prefix = HashEmbed(width//2, nr_vector, column=2)
         suffix = HashEmbed(width//2, nr_vector, column=3)
@@ -594,33 +472,40 @@ def build_text_classifier(nr_class, width=64, **cfg):
     model.lsuv = False
     return model
 
+
 @layerize
 def flatten(seqs, drop=0.):
     ops = Model.ops
     lengths = ops.asarray([len(seq) for seq in seqs], dtype='i')
+
     def finish_update(d_X, sgd=None):
         return ops.unflatten(d_X, lengths, pad=0)
+
     X = ops.flatten(seqs, pad=0)
     return X, finish_update
 
 
-def concatenate_lists(*layers, **kwargs): # pragma: no cover
-    '''Compose two or more models `f`, `g`, etc, such that their outputs are
+def concatenate_lists(*layers, **kwargs):  # pragma: no cover
+    """Compose two or more models `f`, `g`, etc, such that their outputs are
     concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))`
-    '''
+    """
     if not layers:
         return noop()
     drop_factor = kwargs.get('drop_factor', 1.0)
     ops = layers[0].ops
     layers = [chain(layer, flatten) for layer in layers]
     concat = concatenate(*layers)
+
     def concatenate_lists_fwd(Xs, drop=0.):
         drop *= drop_factor
         lengths = ops.asarray([len(X) for X in Xs], dtype='i')
         flat_y, bp_flat_y = concat.begin_update(Xs, drop=drop)
         ys = ops.unflatten(flat_y, lengths)
+
         def concatenate_lists_bwd(d_ys, sgd=None):
             return bp_flat_y(ops.flatten(d_ys), sgd=sgd)
+
         return ys, concatenate_lists_bwd
+
     model = wrap(concatenate_lists_fwd, concat)
     return model
diff --git a/spacy/attrs.pyx b/spacy/attrs.pyx
index 8efd9e189..8113ffebe 100644
--- a/spacy/attrs.pyx
+++ b/spacy/attrs.pyx
@@ -101,17 +101,12 @@ def intify_attrs(stringy_attrs, strings_map=None, _do_deprecated=False):
     """
     Normalize a dictionary of attributes, converting them to ints.
 
-    Arguments:
-        stringy_attrs (dict):
-            Dictionary keyed by attribute string names. Values can be ints or strings.
-
-        strings_map (StringStore):
-            Defaults to None. If provided, encodes string values into ints.
-
-    Returns:
-        inty_attrs (dict):
-            Attributes dictionary with keys and optionally values converted to
-            ints.
+    stringy_attrs (dict): Dictionary keyed by attribute string names. Values
+        can be ints or strings.
+    strings_map (StringStore): Defaults to None. If provided, encodes string
+        values into ints.
+    RETURNS (dict): Attributes dictionary with keys and optionally values
+        converted to ints.
     """
     inty_attrs = {}
     if _do_deprecated:
diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py
index d9a812a15..ad17844a1 100644
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@@ -7,10 +7,9 @@ from pathlib import Path
 from .converters import conllu2json, iob2json, conll_ner2json
 from ..util import prints
 
-# Converters are matched by file extension. To add a converter, add a new entry
-# to this dict with the file extension mapped to the converter function imported
-# from /converters.
-
+# Converters are matched by file extension. To add a converter, add a new
+# entry to this dict with the file extension mapped to the converter function
+# imported from /converters.
 CONVERTERS = {
     'conllu': conllu2json,
     'conll': conllu2json,
@@ -24,8 +23,7 @@ CONVERTERS = {
     output_dir=("output directory for converted file", "positional", None, str),
     n_sents=("Number of sentences per doc", "option", "n", int),
     converter=("Name of converter (auto, iob, conllu or ner)", "option", "c", str),
-    morphology=("Enable appending morphology to tags", "flag", "m", bool)
-)
+    morphology=("Enable appending morphology to tags", "flag", "m", bool))
 def convert(cmd, input_file, output_dir, n_sents=1, morphology=False,
             converter='auto'):
     """
@@ -40,7 +38,7 @@ def convert(cmd, input_file, output_dir, n_sents=1, morphology=False,
         prints(output_path, title="Output directory not found", exits=1)
     if converter == 'auto':
         converter = input_path.suffix[1:]
-    if not converter in CONVERTERS:
+    if converter not in CONVERTERS:
             prints("Can't find converter for %s" % converter,
                 title="Unknown format", exits=1)
     func = CONVERTERS[converter]
diff --git a/spacy/cli/converters/conll_ner2json.py b/spacy/cli/converters/conll_ner2json.py
index e3bd82e7e..fb2979652 100644
--- a/spacy/cli/converters/conll_ner2json.py
+++ b/spacy/cli/converters/conll_ner2json.py
@@ -8,7 +8,8 @@ from ...gold import iob_to_biluo
 
 def conll_ner2json(input_path, output_path, n_sents=10, use_morphology=False):
     """
-    Convert files in the CoNLL-2003 NER format into JSON format for use with train cli.
+    Convert files in the CoNLL-2003 NER format into JSON format for use with
+    train cli.
     """
     docs = read_conll_ner(input_path)
 
diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index 28ae07865..0d3f11153 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -13,10 +13,9 @@ from .. import about
 
 
 @plac.annotations(
-    model=("model to download (shortcut or model name)", "positional", None, str),
+    model=("model to download, shortcut or name)", "positional", None, str),
     direct=("force direct download. Needs model name with version and won't "
-            "perform compatibility check", "flag", "d", bool)
-)
+            "perform compatibility check", "flag", "d", bool))
 def download(cmd, model, direct=False):
     """
     Download compatible model from default download path using pip. Model
@@ -30,21 +29,25 @@ def download(cmd, model, direct=False):
         model_name = shortcuts.get(model, model)
         compatibility = get_compatibility()
         version = get_version(model_name, compatibility)
-        dl = download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version))
+        dl = download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name,
+                                                            v=version))
         if dl == 0:
             try:
                 # Get package path here because link uses
-                # pip.get_installed_distributions() to check if model is a package,
-                # which fails if model was just installed via subprocess
+                # pip.get_installed_distributions() to check if model is a
+                # package, which fails if model was just installed via
+                # subprocess
                 package_path = get_package_path(model_name)
-                link(None, model_name, model, force=True, model_path=package_path)
+                link(None, model_name, model, force=True,
+                     model_path=package_path)
             except:
-                # Dirty, but since spacy.download and the auto-linking is mostly
-                # a convenience wrapper, it's best to show a success message and
-                # loading instructions, even if linking fails.
-                prints("Creating a shortcut link for 'en' didn't work (maybe you "
-                    "don't have admin permissions?), but you can still load "
-                    "the model via its full package name:",
+                # Dirty, but since spacy.download and the auto-linking is
+                # mostly a convenience wrapper, it's best to show a success
+                # message and loading instructions, even if linking fails.
+                prints(
+                    "Creating a shortcut link for 'en' didn't work (maybe "
+                    "you don't have admin permissions?), but you can still "
+                    "load the model via its full package name:",
                     "nlp = spacy.load('%s')" % model_name,
                     title="Download successful")
 
@@ -52,9 +55,10 @@ def download(cmd, model, direct=False):
 def get_json(url, desc):
     r = requests.get(url)
     if r.status_code != 200:
-        prints("Couldn't fetch %s. Please find a model for your spaCy installation "
-               "(v%s), and download it manually." % (desc, about.__version__),
-               about.__docs_models__, title="Server error (%d)" % r.status_code, exits=1)
+        msg = ("Couldn't fetch %s. Please find a model for your spaCy "
+               "installation (v%s), and download it manually.")
+        prints(msg % (desc, about.__version__), about.__docs_models__,
+               title="Server error (%d)" % r.status_code, exits=1)
     return r.json()
 
 
@@ -71,13 +75,13 @@ def get_compatibility():
 def get_version(model, comp):
     if model not in comp:
         version = about.__version__
-        prints("No compatible model found for '%s' (spaCy v%s)." % (model, version),
-               title="Compatibility error", exits=1)
+        msg = "No compatible model found for '%s' (spaCy v%s)."
+        prints(msg % (model, version), title="Compatibility error", exits=1)
     return comp[model][0]
 
 
 def download_model(filename):
     download_url = about.__download_url__ + '/' + filename
-    return subprocess.call([sys.executable, '-m',
-        'pip', 'install', '--no-cache-dir', download_url],
-        env=os.environ.copy())
+    return subprocess.call(
+        [sys.executable, '-m', 'pip', 'install', '--no-cache-dir',
+         download_url], env=os.environ.copy())
diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index 29e30b7d2..d4d54d8aa 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -2,27 +2,15 @@
 from __future__ import unicode_literals, division, print_function
 
 import plac
-import json
-from collections import defaultdict
-import cytoolz
-from pathlib import Path
-import dill
-import tqdm
-from thinc.neural._classes.model import Model
-from thinc.neural.optimizers import linear_decay
 from timeit import default_timer as timer
 import random
 import numpy.random
 
-from ..tokens.doc import Doc
-from ..scorer import Scorer
-from ..gold import GoldParse, merge_sents
-from ..gold import GoldCorpus, minibatch
+from ..gold import GoldCorpus
 from ..util import prints
 from .. import util
-from .. import about
 from .. import displacy
-from ..compat import json_dumps
+
 
 random.seed(0)
 numpy.random.seed(0)
@@ -30,17 +18,18 @@ numpy.random.seed(0)
 
 @plac.annotations(
     model=("Model name or path", "positional", None, str),
-    data_path=("Location of JSON-formatted evaluation data", "positional", None, str),
+    data_path=("Location of JSON-formatted evaluation data", "positional",
+               None, str),
     gold_preproc=("Use gold preprocessing", "flag", "G", bool),
     gpu_id=("Use GPU", "option", "g", int),
-    displacy_path=("Directory to output rendered parses as HTML", "option", "dp", str),
-    displacy_limit=("Limit of parses to render as HTML", "option", "dl", int)
-)
+    displacy_path=("Directory to output rendered parses as HTML", "option",
+                   "dp", str),
+    displacy_limit=("Limit of parses to render as HTML", "option", "dl", int))
 def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False,
              displacy_path=None, displacy_limit=25):
     """
-    Evaluate a model. To render a sample of parses in a HTML file, set an output
-    directory as the displacy_path argument.
+    Evaluate a model. To render a sample of parses in a HTML file, set an
+    output directory as the displacy_path argument.
     """
     if gpu_id >= 0:
         util.use_gpu(gpu_id)
@@ -50,7 +39,8 @@ def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False,
     if not data_path.exists():
         prints(data_path, title="Evaluation data not found", exits=1)
     if displacy_path and not displacy_path.exists():
-        prints(displacy_path, title="Visualization output directory not found", exits=1)
+        prints(displacy_path, title="Visualization output directory not found",
+               exits=1)
     corpus = GoldCorpus(data_path, data_path)
     nlp = util.load_model(model)
     dev_docs = list(corpus.dev_docs(nlp, gold_preproc=gold_preproc))
@@ -64,12 +54,14 @@ def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False,
         docs, golds = zip(*dev_docs)
         render_deps = 'parser' in nlp.meta.get('pipeline', [])
         render_ents = 'ner' in nlp.meta.get('pipeline', [])
-        render_parses(docs, displacy_path, model_name=model, limit=displacy_limit,
-                      deps=render_deps, ents=render_ents)
-        prints(displacy_path, title="Generated %s parses as HTML" % displacy_limit)
+        render_parses(docs, displacy_path, model_name=model,
+                      limit=displacy_limit, deps=render_deps, ents=render_ents)
+        msg = "Generated %s parses as HTML" % displacy_limit
+        prints(displacy_path, title=msg)
 
 
-def render_parses(docs, output_path, model_name='', limit=250, deps=True, ents=True):
+def render_parses(docs, output_path, model_name='', limit=250, deps=True,
+                  ents=True):
     docs[0].user_data['title'] = model_name
     if ents:
         with (output_path / 'entities.html').open('w') as file_:
@@ -77,7 +69,8 @@ def render_parses(docs, output_path, model_name='', limit=250, deps=True, ents=T
             file_.write(html)
     if deps:
         with (output_path / 'parses.html').open('w') as file_:
-            html = displacy.render(docs[:limit], style='dep', page=True, options={'compact': True})
+            html = displacy.render(docs[:limit], style='dep', page=True,
+                                   options={'compact': True})
             file_.write(html)
 
 
diff --git a/spacy/cli/info.py b/spacy/cli/info.py
index 5d45b271c..3636494fb 100644
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@@ -12,8 +12,7 @@ from .. import util
 
 @plac.annotations(
     model=("optional: shortcut link of model", "positional", None, str),
-    markdown=("generate Markdown for GitHub issues", "flag", "md", str)
-)
+    markdown=("generate Markdown for GitHub issues", "flag", "md", str))
 def info(cmd, model=None, markdown=False):
     """Print info about spaCy installation. If a model shortcut link is
     speficied as an argument, print model information. Flag --markdown
diff --git a/spacy/cli/link.py b/spacy/cli/link.py
index 5b333dae5..cfbc97e3e 100644
--- a/spacy/cli/link.py
+++ b/spacy/cli/link.py
@@ -12,8 +12,7 @@ from .. import util
 @plac.annotations(
     origin=("package name or local path to model", "positional", None, str),
     link_name=("name of shortuct link to create", "positional", None, str),
-    force=("force overwriting of existing link", "flag", "f", bool)
-)
+    force=("force overwriting of existing link", "flag", "f", bool))
 def link(cmd, origin, link_name, force=False, model_path=None):
     """
     Create a symlink for models within the spacy/data directory. Accepts
@@ -46,8 +45,9 @@ def link(cmd, origin, link_name, force=False, model_path=None):
         # This is quite dirty, but just making sure other errors are caught.
         prints("Creating a symlink in spacy/data failed. Make sure you have "
                "the required permissions and try re-running the command as "
-               "admin, or use a virtualenv. You can still import the model as a "
-               "module and call its load() method, or create the symlink manually.",
+               "admin, or use a virtualenv. You can still import the model as "
+               "a module and call its load() method, or create the symlink "
+               "manually.",
                "%s --> %s" % (path2str(model_path), path2str(link_path)),
                title="Error: Couldn't link model to '%s'" % link_name)
         raise
diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index 6b0811459..d1984fe65 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -16,10 +16,12 @@ from .. import about
     input_dir=("directory with model data", "positional", None, str),
     output_dir=("output parent directory", "positional", None, str),
     meta_path=("path to meta.json", "option", "m", str),
-    create_meta=("create meta.json, even if one exists in directory", "flag", "c", bool),
-    force=("force overwriting of existing folder in output directory", "flag", "f", bool)
-)
-def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False, force=False):
+    create_meta=("create meta.json, even if one exists in directory", "flag",
+                 "c", bool),
+    force=("force overwriting of existing folder in output directory", "flag",
+           "f", bool))
+def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False,
+            force=False):
     """
     Generate Python package for model data, including meta and required
     installation files. A new directory will be created in the specified
@@ -52,13 +54,15 @@ def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False, force
     package_path = main_path / model_name
 
     create_dirs(package_path, force)
-    shutil.copytree(path2str(input_path), path2str(package_path / model_name_v))
+    shutil.copytree(path2str(input_path),
+                    path2str(package_path / model_name_v))
     create_file(main_path / 'meta.json', json_dumps(meta))
     create_file(main_path / 'setup.py', template_setup)
     create_file(main_path / 'MANIFEST.in', template_manifest)
     create_file(package_path / '__init__.py', template_init)
-    prints(main_path, "To build the package, run `python setup.py sdist` in this "
-           "directory.", title="Successfully created package '%s'" % model_name_v)
+    prints(main_path, "To build the package, run `python setup.py sdist` in "
+           "this directory.",
+           title="Successfully created package '%s'" % model_name_v)
 
 
 def create_dirs(package_path, force):
@@ -66,9 +70,10 @@ def create_dirs(package_path, force):
         if force:
             shutil.rmtree(path2str(package_path))
         else:
-            prints(package_path, "Please delete the directory and try again, or "
-                   "use the --force flag to overwrite existing directories.",
-                   title="Package directory already exists", exits=1)
+            prints(package_path, "Please delete the directory and try again, "
+                   "or use the --force flag to overwrite existing "
+                   "directories.", title="Package directory already exists",
+                   exits=1)
     Path.mkdir(package_path, parents=True)
 
 
@@ -82,7 +87,8 @@ def generate_meta(model_path):
     settings = [('lang', 'Model language', 'en'),
                 ('name', 'Model name', 'model'),
                 ('version', 'Model version', '0.0.0'),
-                ('spacy_version', 'Required spaCy version', '>=%s,<3.0.0' % about.__version__),
+                ('spacy_version', 'Required spaCy version',
+                 '>=%s,<3.0.0' % about.__version__),
                 ('description', 'Model description', False),
                 ('author', 'Author', False),
                 ('email', 'Author email', False),
diff --git a/spacy/cli/profile.py b/spacy/cli/profile.py
index db6fc5b41..a394989d0 100644
--- a/spacy/cli/profile.py
+++ b/spacy/cli/profile.py
@@ -27,15 +27,15 @@ def read_inputs(loc):
 
 @plac.annotations(
     lang=("model/language", "positional", None, str),
-    inputs=("Location of input file", "positional", None, read_inputs)
-)
+    inputs=("Location of input file", "positional", None, read_inputs))
 def profile(cmd, lang, inputs=None):
     """
     Profile a spaCy pipeline, to find out which functions take the most time.
     """
-    nlp = spacy.load(lang) 
+    nlp = spacy.load(lang)
     texts = list(cytoolz.take(10000, inputs))
-    cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(), "Profile.prof")
+    cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(),
+                    "Profile.prof")
     s = pstats.Stats("Profile.prof")
     s.strip_dirs().sort_stats("time").print_stats()
 
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index da398751c..fb96e6c05 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -2,21 +2,14 @@
 from __future__ import unicode_literals, division, print_function
 
 import plac
-import json
-from collections import defaultdict
-import cytoolz
 from pathlib import Path
 import dill
 import tqdm
 from thinc.neural._classes.model import Model
-from thinc.neural.optimizers import linear_decay
 from timeit import default_timer as timer
 import random
 import numpy.random
 
-from ..tokens.doc import Doc
-from ..scorer import Scorer
-from ..gold import GoldParse, merge_sents
 from ..gold import GoldCorpus, minibatch
 from ..util import prints
 from .. import util
@@ -31,8 +24,10 @@ numpy.random.seed(0)
 @plac.annotations(
     lang=("model language", "positional", None, str),
     output_dir=("output directory to store model in", "positional", None, str),
-    train_data=("location of JSON-formatted training data", "positional", None, str),
-    dev_data=("location of JSON-formatted development data (optional)", "positional", None, str),
+    train_data=("location of JSON-formatted training data", "positional",
+                None, str),
+    dev_data=("location of JSON-formatted development data (optional)",
+              "positional", None, str),
     n_iter=("number of iterations", "option", "n", int),
     n_sents=("number of sentences", "option", "ns", int),
     use_gpu=("Use GPU", "option", "g", int),
@@ -42,11 +37,12 @@ numpy.random.seed(0)
     no_entities=("Don't train NER", "flag", "N", bool),
     gold_preproc=("Use gold preprocessing", "flag", "G", bool),
     version=("Model version", "option", "V", str),
-    meta_path=("Optional path to meta.json. All relevant properties will be overwritten.", "option", "m", Path)
-)
+    meta_path=("Optional path to meta.json. All relevant properties will be "
+               "overwritten.", "option", "m", Path))
 def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
-          use_gpu=-1, vectors=None, no_tagger=False, no_parser=False, no_entities=False,
-          gold_preproc=False, version="0.0.0", meta_path=None):
+          use_gpu=-1, vectors=None, no_tagger=False, no_parser=False,
+          no_entities=False, gold_preproc=False, version="0.0.0",
+          meta_path=None):
     """
     Train a model. Expects data in spaCy's JSON format.
     """
@@ -72,9 +68,12 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
     meta.setdefault('name', 'unnamed')
 
     pipeline = ['tagger', 'parser', 'ner']
-    if no_tagger and 'tagger' in pipeline: pipeline.remove('tagger')
-    if no_parser and 'parser' in pipeline: pipeline.remove('parser')
-    if no_entities and 'ner' in pipeline: pipeline.remove('ner')
+    if no_tagger and 'tagger' in pipeline:
+        pipeline.remove('tagger')
+    if no_parser and 'parser' in pipeline:
+        pipeline.remove('parser')
+    if no_entities and 'ner' in pipeline:
+        pipeline.remove('ner')
 
     # Take dropout and batch size as generators of values -- dropout
     # starts high and decays sharply, to force the optimizer to explore.
@@ -139,7 +138,7 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
                         scorer = nlp_loaded.evaluate(dev_docs)
                         end_time = timer()
                         cpu_wps = nwords/(end_time-start_time)
-                acc_loc =(output_path / ('model%d' % i) / 'accuracy.json')
+                acc_loc = (output_path / ('model%d' % i) / 'accuracy.json')
                 with acc_loc.open('w') as file_:
                     file_.write(json_dumps(scorer.scores))
                 meta_loc = output_path / ('model%d' % i) / 'meta.json'
@@ -157,7 +156,8 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
                 with meta_loc.open('w') as file_:
                     file_.write(json_dumps(meta))
                 util.set_env_log(True)
-            print_progress(i, losses, scorer.scores, cpu_wps=cpu_wps, gpu_wps=gpu_wps)
+            print_progress(i, losses, scorer.scores, cpu_wps=cpu_wps,
+                           gpu_wps=gpu_wps)
     finally:
         print("Saving model...")
         try:
diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py
index c1f992ed6..1c645a554 100644
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@@ -1,5 +1,5 @@
 # coding: utf8
-from __future__ import unicode_literals
+from __future__ import unicode_literals, print_function
 
 import requests
 import pkg_resources
@@ -29,8 +29,10 @@ def validate(cmd):
     model_links = get_model_links(current_compat)
     model_pkgs = get_model_pkgs(current_compat, all_models)
     incompat_links = {l for l, d in model_links.items() if not d['compat']}
-    incompat_models = {d['name'] for _, d in model_pkgs.items() if not d['compat']}
-    incompat_models.update([d['name'] for _, d in model_links.items() if not d['compat']])
+    incompat_models = {d['name'] for _, d in model_pkgs.items()
+                       if not d['compat']}
+    incompat_models.update([d['name'] for _, d in model_links.items()
+                            if not d['compat']])
     na_models = [m for m in incompat_models if m not in current_compat]
     update_models = [m for m in incompat_models if m in current_compat]
 
@@ -90,7 +92,6 @@ def get_model_pkgs(compat, all_models):
 
 
 def get_model_row(compat, name, data, type='package'):
-    tpl_row = '    {:<10}' + ('  {:<20}' * 4)
     tpl_red = '\x1b[38;5;1m{}\x1b[0m'
     tpl_green = '\x1b[38;5;2m{}\x1b[0m'
     if data['compat']:
@@ -110,7 +111,8 @@ def get_row(*args):
 def is_model_path(model_path):
     exclude = ['cache', 'pycache', '__pycache__']
     name = model_path.parts[-1]
-    return model_path.is_dir() and name not in exclude and not name.startswith('.')
+    return (model_path.is_dir() and name not in exclude
+            and not name.startswith('.'))
 
 
 def is_compat(compat, name, version):
@@ -118,6 +120,7 @@ def is_compat(compat, name, version):
 
 
 def reformat_version(version):
+    """Hack to reformat old versions ending on '-alpha' to match pip format."""
     if version.endswith('-alpha'):
         return version.replace('-alpha', 'a0')
     return version.replace('-alpha', 'a')
diff --git a/spacy/compat.py b/spacy/compat.py
index 8dd3d6b03..7cd06e545 100644
--- a/spacy/compat.py
+++ b/spacy/compat.py
@@ -91,15 +91,15 @@ def symlink_to(orig, dest):
 
 
 def is_config(python2=None, python3=None, windows=None, linux=None, osx=None):
-    return ((python2 == None or python2 == is_python2) and
-            (python3 == None or python3 == is_python3) and
-            (windows == None or windows == is_windows) and
-            (linux == None or linux == is_linux) and
-            (osx == None or osx == is_osx))
+    return ((python2 is None or python2 == is_python2) and
+            (python3 is None or python3 == is_python3) and
+            (windows is None or windows == is_windows) and
+            (linux is None or linux == is_linux) and
+            (osx is None or osx == is_osx))
 
 
 def normalize_string_keys(old):
-    '''Given a dictionary, make sure keys are unicode strings, not bytes.'''
+    """Given a dictionary, make sure keys are unicode strings, not bytes."""
     new = {}
     for key, value in old.items():
         if isinstance(key, bytes_):
diff --git a/spacy/deprecated.py b/spacy/deprecated.py
index ad52bfe24..a1143474a 100644
--- a/spacy/deprecated.py
+++ b/spacy/deprecated.py
@@ -24,7 +24,7 @@ def depr_model_download(lang):
 
 
 def resolve_load_name(name, **overrides):
-    """Resolve model loading if deprecated path kwarg is specified in overrides.
+    """Resolve model loading if deprecated path kwarg in overrides.
 
     name (unicode): Name of model to load.
     **overrides: Overrides specified in spacy.load().
@@ -32,8 +32,9 @@ def resolve_load_name(name, **overrides):
     """
     if overrides.get('path') not in (None, False, True):
         name = overrides.get('path')
-        prints("To load a model from a path, you can now use the first argument. "
-               "The model meta is used to load the required Language class.",
-               "OLD: spacy.load('en', path='/some/path')", "NEW: spacy.load('/some/path')",
+        prints("To load a model from a path, you can now use the first "
+               "argument. The model meta is used to load the Language class.",
+               "OLD: spacy.load('en', path='/some/path')",
+               "NEW: spacy.load('/some/path')",
                title="Warning: deprecated argument 'path'")
     return name
diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py
index 7c479f94c..e160c31b6 100644
--- a/spacy/displacy/__init__.py
+++ b/spacy/displacy/__init__.py
@@ -12,7 +12,7 @@ IS_JUPYTER = is_in_jupyter()
 
 
 def render(docs, style='dep', page=False, minify=False, jupyter=IS_JUPYTER,
-          options={}, manual=False):
+           options={}, manual=False):
     """Render displaCy visualisation.
 
     docs (list or Doc): Document(s) to visualise.
@@ -21,7 +21,7 @@ def render(docs, style='dep', page=False, minify=False, jupyter=IS_JUPYTER,
     minify (bool): Minify HTML markup.
     jupyter (bool): Experimental, use Jupyter's `display()` to output markup.
     options (dict): Visualiser-specific options, e.g. colors.
-    manual (bool): Don't parse `Doc` and instead, expect a dict or list of dicts.
+    manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
     RETURNS (unicode): Rendered HTML markup.
     """
     factories = {'dep': (DependencyRenderer, parse_deps),
@@ -35,7 +35,7 @@ def render(docs, style='dep', page=False, minify=False, jupyter=IS_JUPYTER,
     parsed = [converter(doc, options) for doc in docs] if not manual else docs
     _html['parsed'] = renderer.render(parsed, page=page, minify=minify).strip()
     html = _html['parsed']
-    if jupyter: # return HTML rendered by IPython display()
+    if jupyter:  # return HTML rendered by IPython display()
         from IPython.core.display import display, HTML
         return display(HTML(html))
     return html
@@ -50,13 +50,15 @@ def serve(docs, style='dep', page=True, minify=False, options={}, manual=False,
     page (bool): Render markup as full HTML page.
     minify (bool): Minify HTML markup.
     options (dict): Visualiser-specific options, e.g. colors.
-    manual (bool): Don't parse `Doc` and instead, expect a dict or list of dicts.
+    manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
     port (int): Port to serve visualisation.
     """
     from wsgiref import simple_server
-    render(docs, style=style, page=page, minify=minify, options=options, manual=manual)
+    render(docs, style=style, page=page, minify=minify, options=options,
+           manual=manual)
     httpd = simple_server.make_server('0.0.0.0', port, app)
-    prints("Using the '%s' visualizer" % style, title="Serving on port %d..." % port)
+    prints("Using the '%s' visualizer" % style,
+           title="Serving on port %d..." % port)
     try:
         httpd.serve_forever()
     except KeyboardInterrupt:
@@ -67,7 +69,8 @@ def serve(docs, style='dep', page=True, minify=False, options={}, manual=False,
 
 def app(environ, start_response):
     # headers and status need to be bytes in Python 2, see #1227
-    headers = [(b_to_str(b'Content-type'), b_to_str(b'text/html; charset=utf-8'))]
+    headers = [(b_to_str(b'Content-type'),
+                b_to_str(b'text/html; charset=utf-8'))]
     start_response(b_to_str(b'200 OK'), headers)
     res = _html['parsed'].encode(encoding='utf-8')
     return [res]
@@ -89,9 +92,9 @@ def parse_deps(orig_doc, options={}):
             end = word.i + 1
             while end < len(doc) and doc[end].is_punct:
                 end += 1
-            span = doc[start : end]
+            span = doc[start:end]
             spans.append((span.start_char, span.end_char, word.tag_,
-                            word.lemma_, word.ent_type_))
+                          word.lemma_, word.ent_type_))
         for span_props in spans:
             doc.merge(*span_props)
     words = [{'text': w.text, 'tag': w.tag_} for w in doc]
@@ -113,6 +116,7 @@ def parse_ents(doc, options={}):
     RETURNS (dict): Generated entities keyed by text (original text) and ents.
     """
     ents = [{'start': ent.start_char, 'end': ent.end_char, 'label': ent.label_}
-             for ent in doc.ents]
-    title = doc.user_data.get('title', None) if hasattr(doc, 'user_data') else None
+            for ent in doc.ents]
+    title = (doc.user_data.get('title', None)
+             if hasattr(doc, 'user_data') else None)
     return {'text': doc.text, 'ents': ents, 'title': title}
diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py
index 1050ffa87..4a494591c 100644
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@@ -14,13 +14,15 @@ class DependencyRenderer(object):
         """Initialise dependency renderer.
 
         options (dict): Visualiser-specific options (compact, word_spacing,
-                        arrow_spacing, arrow_width, arrow_stroke, distance,
-                        offset_x, color, bg, font)
+            arrow_spacing, arrow_width, arrow_stroke, distance, offset_x,
+            color, bg, font)
         """
         self.compact = options.get('compact', False)
         self.word_spacing = options.get('word_spacing', 45)
-        self.arrow_spacing = options.get('arrow_spacing', 12 if self.compact else 20)
-        self.arrow_width = options.get('arrow_width', 6 if self.compact else 10)
+        self.arrow_spacing = options.get('arrow_spacing',
+                                         12 if self.compact else 20)
+        self.arrow_width = options.get('arrow_width',
+                                       6 if self.compact else 10)
         self.arrow_stroke = options.get('arrow_stroke', 2)
         self.distance = options.get('distance', 150 if self.compact else 175)
         self.offset_x = options.get('offset_x', 50)
@@ -39,7 +41,8 @@ class DependencyRenderer(object):
         rendered = [self.render_svg(i, p['words'], p['arcs'])
                     for i, p in enumerate(parsed)]
         if page:
-            content = ''.join([TPL_FIGURE.format(content=svg) for svg in rendered])
+            content = ''.join([TPL_FIGURE.format(content=svg)
+                               for svg in rendered])
             markup = TPL_PAGE.format(content=content)
         else:
             markup = ''.join(rendered)
@@ -63,12 +66,13 @@ class DependencyRenderer(object):
         self.id = render_id
         words = [self.render_word(w['text'], w['tag'], i)
                  for i, w in enumerate(words)]
-        arcs = [self.render_arrow(a['label'], a['start'], a['end'], a['dir'], i)
+        arcs = [self.render_arrow(a['label'], a['start'],
+                                  a['end'], a['dir'], i)
                 for i, a in enumerate(arcs)]
         content = ''.join(words) + ''.join(arcs)
-        return TPL_DEP_SVG.format(id=self.id, width=self.width, height=self.height,
-                                  color=self.color, bg=self.bg, font=self.font,
-                                  content=content)
+        return TPL_DEP_SVG.format(id=self.id, width=self.width,
+                                  height=self.height, color=self.color,
+                                  bg=self.bg, font=self.font, content=content)
 
     def render_word(self, text, tag, i):
         """Render individual word.
@@ -96,7 +100,7 @@ class DependencyRenderer(object):
         x_start = self.offset_x+start*self.distance+self.arrow_spacing
         y = self.offset_y
         x_end = (self.offset_x+(end-start)*self.distance+start*self.distance
-                 -self.arrow_spacing*(self.highest_level-level)/4)
+                 - self.arrow_spacing*(self.highest_level-level)/4)
         y_curve = self.offset_y-level*self.distance/2
         if self.compact:
             y_curve = self.offset_y-level*self.distance/6
@@ -133,8 +137,10 @@ class DependencyRenderer(object):
         if direction is 'left':
             pos1, pos2, pos3 = (x, x-self.arrow_width+2, x+self.arrow_width-2)
         else:
-            pos1, pos2, pos3 = (end, end+self.arrow_width-2, end-self.arrow_width+2)
-        arrowhead = (pos1, y+2, pos2, y-self.arrow_width, pos3, y-self.arrow_width)
+            pos1, pos2, pos3 = (end, end+self.arrow_width-2,
+                                end-self.arrow_width+2)
+        arrowhead = (pos1, y+2, pos2, y-self.arrow_width, pos3,
+                     y-self.arrow_width)
         return "M{},{} L{},{} {},{}".format(*arrowhead)
 
     def get_levels(self, arcs):
@@ -159,9 +165,10 @@ class EntityRenderer(object):
         """
         colors = {'ORG': '#7aecec', 'PRODUCT': '#bfeeb7', 'GPE': '#feca74',
                   'LOC': '#ff9561', 'PERSON': '#aa9cfc', 'NORP': '#c887fb',
-                  'FACILITY': '#9cc9cc', 'EVENT': '#ffeb80', 'LANGUAGE': '#ff8197',
-                  'WORK_OF_ART': '#f0d0ff', 'DATE': '#bfe1d9', 'TIME': '#bfe1d9',
-                  'MONEY': '#e4e7d2', 'QUANTITY': '#e4e7d2', 'ORDINAL': '#e4e7d2',
+                  'FACILITY': '#9cc9cc', 'EVENT': '#ffeb80', 'LAW': '#ff8197',
+                  'LANGUAGE': '#ff8197', 'WORK_OF_ART': '#f0d0ff',
+                  'DATE': '#bfe1d9', 'TIME': '#bfe1d9', 'MONEY': '#e4e7d2',
+                  'QUANTITY': '#e4e7d2', 'ORDINAL': '#e4e7d2',
                   'CARDINAL': '#e4e7d2', 'PERCENT': '#e4e7d2'}
         colors.update(options.get('colors', {}))
         self.default_color = '#ddd'
@@ -176,9 +183,11 @@ class EntityRenderer(object):
         minify (bool): Minify HTML markup.
         RETURNS (unicode): Rendered HTML markup.
         """
-        rendered = [self.render_ents(p['text'], p['ents'], p.get('title', None)) for p in parsed]
+        rendered = [self.render_ents(p['text'], p['ents'],
+                    p.get('title', None)) for p in parsed]
         if page:
-            docs = ''.join([TPL_FIGURE.format(content=doc) for doc in rendered])
+            docs = ''.join([TPL_FIGURE.format(content=doc)
+                            for doc in rendered])
             markup = TPL_PAGE.format(content=docs)
         else:
             markup = ''.join(rendered)
diff --git a/spacy/glossary.py b/spacy/glossary.py
index fd74d85e7..78e61f8a7 100644
--- a/spacy/glossary.py
+++ b/spacy/glossary.py
@@ -264,7 +264,6 @@ GLOSSARY = {
     'nk':           'noun kernel element',
     'nmc':          'numerical component',
     'oa':           'accusative object',
-    'oa':           'second accusative object',
     'oc':           'clausal object',
     'og':           'genitive object',
     'op':           'prepositional object',
diff --git a/spacy/gold.pyx b/spacy/gold.pyx
index 5729af667..5adef7bf7 100644
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@@ -2,7 +2,6 @@
 # coding: utf8
 from __future__ import unicode_literals, print_function
 
-import io
 import re
 import ujson
 import random
@@ -10,9 +9,8 @@ import cytoolz
 import itertools
 
 from .syntax import nonproj
-from .util import ensure_path
-from . import util
 from .tokens import Doc
+from . import util
 
 
 def tags_to_entities(tags):
@@ -54,7 +52,8 @@ def merge_sents(sents):
         m_deps[3].extend(head + i for head in heads)
         m_deps[4].extend(labels)
         m_deps[5].extend(ner)
-        m_brackets.extend((b['first'] + i, b['last'] + i, b['label']) for b in brackets)
+        m_brackets.extend((b['first'] + i, b['last'] + i, b['label'])
+                          for b in brackets)
         i += len(ids)
     return [(m_deps, m_brackets)]
 
@@ -80,6 +79,8 @@ def align(cand_words, gold_words):
 
 
 punct_re = re.compile(r'\W')
+
+
 def _min_edit_path(cand_words, gold_words):
     cdef:
         Pool mem
@@ -98,9 +99,9 @@ def _min_edit_path(cand_words, gold_words):
     mem = Pool()
     n_cand = len(cand_words)
     n_gold = len(gold_words)
-    # Levenshtein distance, except we need the history, and we may want different
-    # costs.
-    # Mark operations with a string, and score the history using _edit_cost.
+    # Levenshtein distance, except we need the history, and we may want
+    # different costs. Mark operations with a string, and score the history
+    # using _edit_cost.
     previous_row = []
     prev_costs = <int*>mem.alloc(n_gold + 1, sizeof(int))
     curr_costs = <int*>mem.alloc(n_gold + 1, sizeof(int))
@@ -144,9 +145,9 @@ def _min_edit_path(cand_words, gold_words):
 
 
 def minibatch(items, size=8):
-    '''Iterate over batches of items. `size` may be an iterator,
+    """Iterate over batches of items. `size` may be an iterator,
     so that batch-size can vary on each step.
-    '''
+    """
     if isinstance(size, int):
         size_ = itertools.repeat(8)
     else:
@@ -168,6 +169,7 @@ class GoldCorpus(object):
 
         train_path (unicode or Path): File or directory of training data.
         dev_path (unicode or Path): File or directory of development data.
+        RETURNS (GoldCorpus): The newly created object.
         """
         self.train_path = util.ensure_path(train_path)
         self.dev_path = util.ensure_path(dev_path)
@@ -213,7 +215,7 @@ class GoldCorpus(object):
         train_tuples = self.train_tuples
         if projectivize:
             train_tuples = nonproj.preprocess_training_data(
-                               self.train_tuples, label_freq_cutoff=100)
+                self.train_tuples, label_freq_cutoff=100)
         random.shuffle(train_tuples)
         gold_docs = self.iter_gold_docs(nlp, train_tuples, gold_preproc,
                                         max_length=max_length,
@@ -222,7 +224,6 @@ class GoldCorpus(object):
 
     def dev_docs(self, nlp, gold_preproc=False):
         gold_docs = self.iter_gold_docs(nlp, self.dev_tuples, gold_preproc)
-        #gold_docs = nlp.preprocess_gold(gold_docs)
         yield from gold_docs
 
     @classmethod
@@ -233,7 +234,6 @@ class GoldCorpus(object):
                 raw_text = None
             else:
                 paragraph_tuples = merge_sents(paragraph_tuples)
-
             docs = cls._make_docs(nlp, raw_text, paragraph_tuples,
                                   gold_preproc, noise_level=noise_level)
             golds = cls._make_golds(docs, paragraph_tuples)
@@ -248,17 +248,20 @@ class GoldCorpus(object):
             raw_text = add_noise(raw_text, noise_level)
             return [nlp.make_doc(raw_text)]
         else:
-            return [Doc(nlp.vocab, words=add_noise(sent_tuples[1], noise_level))
-                for (sent_tuples, brackets) in paragraph_tuples]
+            return [Doc(nlp.vocab,
+                        words=add_noise(sent_tuples[1], noise_level))
+                    for (sent_tuples, brackets) in paragraph_tuples]
 
     @classmethod
     def _make_golds(cls, docs, paragraph_tuples):
         assert len(docs) == len(paragraph_tuples)
         if len(docs) == 1:
-            return [GoldParse.from_annot_tuples(docs[0], paragraph_tuples[0][0])]
+            return [GoldParse.from_annot_tuples(docs[0],
+                                                paragraph_tuples[0][0])]
         else:
             return [GoldParse.from_annot_tuples(doc, sent_tuples)
-                    for doc, (sent_tuples, brackets) in zip(docs, paragraph_tuples)]
+                    for doc, (sent_tuples, brackets)
+                    in zip(docs, paragraph_tuples)]
 
     @staticmethod
     def walk_corpus(path):
@@ -305,7 +308,7 @@ def _corrupt(c, noise_level):
 
 
 def read_json_file(loc, docs_filter=None, limit=None):
-    loc = ensure_path(loc)
+    loc = util.ensure_path(loc)
     if loc.is_dir():
         for filename in loc.iterdir():
             yield from read_json_file(loc / filename, limit=limit)
@@ -330,16 +333,16 @@ def read_json_file(loc, docs_filter=None, limit=None):
                     for i, token in enumerate(sent['tokens']):
                         words.append(token['orth'])
                         ids.append(i)
-                        tags.append(token.get('tag','-'))
-                        heads.append(token.get('head',0) + i)
-                        labels.append(token.get('dep',''))
+                        tags.append(token.get('tag', '-'))
+                        heads.append(token.get('head', 0) + i)
+                        labels.append(token.get('dep', ''))
                         # Ensure ROOT label is case-insensitive
                         if labels[-1].lower() == 'root':
                             labels[-1] = 'ROOT'
                         ner.append(token.get('ner', '-'))
                     sents.append([
                         [ids, words, tags, heads, labels, ner],
-                         sent.get('brackets', [])])
+                        sent.get('brackets', [])])
                 if sents:
                     yield [paragraph.get('raw', None), sents]
 
@@ -382,19 +385,21 @@ cdef class GoldParse:
     @classmethod
     def from_annot_tuples(cls, doc, annot_tuples, make_projective=False):
         _, words, tags, heads, deps, entities = annot_tuples
-        return cls(doc, words=words, tags=tags, heads=heads, deps=deps, entities=entities,
-                   make_projective=make_projective)
+        return cls(doc, words=words, tags=tags, heads=heads, deps=deps,
+                   entities=entities, make_projective=make_projective)
 
-    def __init__(self, doc, annot_tuples=None, words=None, tags=None, heads=None,
-                 deps=None, entities=None, make_projective=False,
+    def __init__(self, doc, annot_tuples=None, words=None, tags=None,
+                 heads=None, deps=None, entities=None, make_projective=False,
                  cats=None):
         """Create a GoldParse.
 
         doc (Doc): The document the annotations refer to.
         words (iterable): A sequence of unicode word strings.
         tags (iterable): A sequence of strings, representing tag annotations.
-        heads (iterable): A sequence of integers, representing syntactic head offsets.
-        deps (iterable): A sequence of strings, representing the syntactic relation types.
+        heads (iterable): A sequence of integers, representing syntactic
+            head offsets.
+        deps (iterable): A sequence of strings, representing the syntactic
+            relation types.
         entities (iterable): A sequence of named entity annotations, either as
             BILUO tag strings, or as `(start_char, end_char, label)` tuples,
             representing the entity positions.
@@ -404,9 +409,10 @@ cdef class GoldParse:
             document (usually a sentence). Unlike entity annotations, label
             annotations can overlap, i.e. a single word can be covered by
             multiple labelled spans. The TextCategorizer component expects
-            true examples of a label to have the value 1.0, and negative examples
-            of a label to have the value 0.0. Labels not in the dictionary are
-            treated as missing -- the gradient for those labels will be zero.
+            true examples of a label to have the value 1.0, and negative
+            examples of a label to have the value 0.0. Labels not in the
+            dictionary are treated as missing - the gradient for those labels
+            will be zero.
         RETURNS (GoldParse): The newly constructed object.
         """
         if words is None:
@@ -470,11 +476,11 @@ cdef class GoldParse:
                 self.ner[i] = entities[gold_i]
 
         cycle = nonproj.contains_cycle(self.heads)
-        if cycle != None:
+        if cycle is not None:
             raise Exception("Cycle found: %s" % cycle)
 
         if make_projective:
-            proj_heads,_ = nonproj.projectivize(self.heads, self.labels)
+            proj_heads, _ = nonproj.projectivize(self.heads, self.labels)
             self.heads = proj_heads
 
     def __len__(self):
@@ -497,20 +503,19 @@ cdef class GoldParse:
 
 
 def biluo_tags_from_offsets(doc, entities, missing='O'):
-    """Encode labelled spans into per-token tags, using the Begin/In/Last/Unit/Out
-    scheme (BILUO).
+    """Encode labelled spans into per-token tags, using the
+    Begin/In/Last/Unit/Out scheme (BILUO).
 
     doc (Doc): The document that the entity offsets refer to. The output tags
         will refer to the token boundaries within the document.
-    entities (iterable): A sequence of `(start, end, label)` triples. `start` and
-        `end` should be character-offset integers denoting the slice into the
-        original string.
-
+    entities (iterable): A sequence of `(start, end, label)` triples. `start`
+        and `end` should be character-offset integers denoting the slice into
+        the original string.
     RETURNS (list): A list of unicode strings, describing the tags. Each tag
         string will be of the form either "", "O" or "{action}-{label}", where
         action is one of "B", "I", "L", "U". The string "-" is used where the
-        entity offsets don't align with the tokenization in the `Doc` object. The
-        training algorithm will view these as missing values. "O" denotes a
+        entity offsets don't align with the tokenization in the `Doc` object.
+        The training algorithm will view these as missing values. "O" denotes a
         non-entity token. "B" denotes the beginning of a multi-token entity,
         "I" the inside of an entity of three or more tokens, and "L" the end
         of an entity of two or more tokens. "U" denotes a single-token entity.
diff --git a/spacy/language.py b/spacy/language.py
index 959fee916..05546cde4 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1,31 +1,28 @@
 # coding: utf8
 from __future__ import absolute_import, unicode_literals
-from contextlib import contextmanager
-import copy
 
-from thinc.neural import Model
 import random
 import ujson
-from collections import OrderedDict
 import itertools
 import weakref
 import functools
-import tqdm
+from collections import OrderedDict
+from contextlib import contextmanager
+from copy import copy
+from thinc.neural import Model
+from thinc.neural.optimizers import Adam
 
 from .tokenizer import Tokenizer
 from .vocab import Vocab
-from .tagger import Tagger
 from .lemmatizer import Lemmatizer
-
-from .pipeline import DependencyParser, Tensorizer, Tagger
-from .pipeline import EntityRecognizer, SimilarityHook, TextCategorizer
-
-from .compat import Optimizer
-from .compat import json_dumps, izip, copy_reg
+from .pipeline import DependencyParser, Tensorizer, Tagger, EntityRecognizer
+from .pipeline import SimilarityHook, TextCategorizer
+from .compat import json_dumps, izip
 from .scorer import Scorer
 from ._ml import link_vectors_to_models
 from .attrs import IS_STOP
-from .lang.punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
+from .lang.punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
+from .lang.punctuation import TOKENIZER_INFIXES
 from .lang.tokenizer_exceptions import TOKEN_MATCH
 from .lang.tag_map import TAG_MAP
 from .lang.lex_attrs import LEX_ATTRS, is_stop
@@ -57,16 +54,18 @@ class BaseDefaults(object):
     def create_tokenizer(cls, nlp=None):
         rules = cls.tokenizer_exceptions
         token_match = cls.token_match
-        prefix_search = util.compile_prefix_regex(cls.prefixes).search \
-                        if cls.prefixes else None
-        suffix_search = util.compile_suffix_regex(cls.suffixes).search \
-                        if cls.suffixes else None
-        infix_finditer = util.compile_infix_regex(cls.infixes).finditer \
-                         if cls.infixes else None
+        prefix_search = (util.compile_prefix_regex(cls.prefixes).search
+                         if cls.prefixes else None)
+        suffix_search = (util.compile_suffix_regex(cls.suffixes).search
+                         if cls.suffixes else None)
+        infix_finditer = (util.compile_infix_regex(cls.infixes).finditer
+                          if cls.infixes else None)
         vocab = nlp.vocab if nlp is not None else cls.create_vocab(nlp)
         return Tokenizer(vocab, rules=rules,
-                         prefix_search=prefix_search, suffix_search=suffix_search,
-                         infix_finditer=infix_finditer, token_match=token_match)
+                         prefix_search=prefix_search,
+                         suffix_search=suffix_search,
+                         infix_finditer=infix_finditer,
+                         token_match=token_match)
 
     pipe_names = ['tensorizer', 'tagger', 'parser', 'ner']
     token_match = TOKEN_MATCH
@@ -98,7 +97,7 @@ class Language(object):
 
     factories = {
         'tokenizer': lambda nlp: nlp.Defaults.create_tokenizer(nlp),
-        'tensorizer': lambda nlp, **cfg: TokenVectorEncoder(nlp.vocab, **cfg),
+        'tensorizer': lambda nlp, **cfg: Tensorizer(nlp.vocab, **cfg),
         'tagger': lambda nlp, **cfg: Tagger(nlp.vocab, **cfg),
         'parser': lambda nlp, **cfg: DependencyParser(nlp.vocab, **cfg),
         'ner': lambda nlp, **cfg: EntityRecognizer(nlp.vocab, **cfg),
@@ -218,14 +217,14 @@ class Language(object):
     def add_pipe(self, component, name=None, before=None, after=None,
                  first=None, last=None):
         """Add a component to the processing pipeline. Valid components are
-        callables that take a `Doc` object, modify it and return it. Only one of
-        before, after, first or last can be set. Default behaviour is "last".
+        callables that take a `Doc` object, modify it and return it. Only one
+        of before/after/first/last can be set. Default behaviour is "last".
 
         component (callable): The pipeline component.
         name (unicode): Name of pipeline component. Overwrites existing
             component.name attribute if available. If no name is set and
             the component exposes no name attribute, component.__name__ is
-            used. An error is raised if the name already exists in the pipeline.
+            used. An error is raised if a name already exists in the pipeline.
         before (unicode): Component name to insert component directly before.
         after (unicode): Component name to insert component directly after.
         first (bool): Insert component first / not first in the pipeline.
@@ -240,7 +239,8 @@ class Language(object):
                 name = component.name
             elif hasattr(component, '__name__'):
                 name = component.__name__
-            elif hasattr(component, '__class__') and hasattr(component.__class__, '__name__'):
+            elif (hasattr(component, '__class__') and
+                  hasattr(component.__class__, '__name__')):
                 name = component.__class__.__name__
             else:
                 name = repr(component)
@@ -269,7 +269,7 @@ class Language(object):
         `name in nlp.pipe_names`.
 
         name (unicode): Name of the component.
-        RETURNS (bool): Whether a component of that name exists in the pipeline.
+        RETURNS (bool): Whether a component of the name exists in the pipeline.
         """
         return name in self.pipe_names
 
@@ -332,15 +332,12 @@ class Language(object):
         return doc
 
     def disable_pipes(self, *names):
-        '''Disable one or more pipeline components.
-
-        If used as a context manager, the pipeline will be restored to the initial
-        state at the end of the block. Otherwise, a DisabledPipes object is
-        returned, that has a `.restore()` method you can use to undo your
-        changes.
+        """Disable one or more pipeline components. If used as a context
+        manager, the pipeline will be restored to the initial state at the end
+        of the block. Otherwise, a DisabledPipes object is returned, that has
+        a `.restore()` method you can use to undo your changes.
 
         EXAMPLE:
-
             >>> nlp.add_pipe('parser')
             >>> nlp.add_pipe('tagger')
             >>> with nlp.disable_pipes('parser', 'tagger'):
@@ -351,7 +348,7 @@ class Language(object):
             >>> assert not nlp.has_pipe('parser')
             >>> disabled.restore()
             >>> assert nlp.has_pipe('parser')
-        '''
+        """
         return DisabledPipes(self, *names)
 
     def make_doc(self, text):
@@ -367,14 +364,14 @@ class Language(object):
         RETURNS (dict): Results from the update.
 
         EXAMPLE:
-            >>> with nlp.begin_training(gold, use_gpu=True) as (trainer, optimizer):
+            >>> with nlp.begin_training(gold) as (trainer, optimizer):
             >>>    for epoch in trainer.epochs(gold):
             >>>        for docs, golds in epoch:
             >>>            state = nlp.update(docs, golds, sgd=optimizer)
         """
         if len(docs) != len(golds):
             raise IndexError("Update expects same number of docs and golds "
-                "Got: %d, %d" % (len(docs), len(golds)))
+                             "Got: %d, %d" % (len(docs), len(golds)))
         if len(docs) == 0:
             return
         if sgd is None:
@@ -382,8 +379,10 @@ class Language(object):
                 self._optimizer = Adam(Model.ops, 0.001)
             sgd = self._optimizer
         grads = {}
+
         def get_grads(W, dW, key=None):
             grads[key] = (W, dW)
+
         pipes = list(self.pipeline)
         random.shuffle(pipes)
         for name, proc in pipes:
@@ -420,8 +419,8 @@ class Language(object):
         eps = util.env_opt('optimizer_eps', 1e-08)
         L2 = util.env_opt('L2_penalty', 1e-6)
         max_grad_norm = util.env_opt('grad_norm_clip', 1.)
-        self._optimizer = Optimizer(Model.ops, learn_rate, L2=L2, beta1=beta1,
-                                    beta2=beta2, eps=eps)
+        self._optimizer = Adam(Model.ops, learn_rate, L2=L2, beta1=beta1,
+                               beta2=beta2, eps=eps)
         self._optimizer.max_grad_norm = max_grad_norm
         self._optimizer.device = device
         return self._optimizer
@@ -460,8 +459,8 @@ class Language(object):
         eps = util.env_opt('optimizer_eps', 1e-08)
         L2 = util.env_opt('L2_penalty', 1e-6)
         max_grad_norm = util.env_opt('grad_norm_clip', 1.)
-        self._optimizer = Optimizer(Model.ops, learn_rate, L2=L2, beta1=beta1,
-                              beta2=beta2, eps=eps)
+        self._optimizer = Adam(Model.ops, learn_rate, L2=L2, beta1=beta1,
+                               beta2=beta2, eps=eps)
         self._optimizer.max_grad_norm = max_grad_norm
         self._optimizer.device = device
         return self._optimizer
@@ -512,17 +511,17 @@ class Language(object):
                 pass
 
     def pipe(self, texts, as_tuples=False, n_threads=2, batch_size=1000,
-            disable=[]):
-        """Process texts as a stream, and yield `Doc` objects in order. Supports
-        GIL-free multi-threading.
+             disable=[]):
+        """Process texts as a stream, and yield `Doc` objects in order.
+        Supports GIL-free multi-threading.
 
         texts (iterator): A sequence of texts to process.
         as_tuples (bool):
             If set to True, inputs should be a sequence of
             (text, context) tuples. Output will then be a sequence of
             (doc, context) tuples. Defaults to False.
-        n_threads (int): The number of worker threads to use. If -1, OpenMP will
-            decide how many to use at run time. Default is 2.
+        n_threads (int): The number of worker threads to use. If -1, OpenMP
+            will decide how many to use at run time. Default is 2.
         batch_size (int): The number of texts to buffer.
         disable (list): Names of the pipeline components to disable.
         YIELDS (Doc): Documents in the order of the original text.
@@ -546,7 +545,8 @@ class Language(object):
             if name in disable:
                 continue
             if hasattr(proc, 'pipe'):
-                docs = proc.pipe(docs, n_threads=n_threads, batch_size=batch_size)
+                docs = proc.pipe(docs, n_threads=n_threads,
+                                 batch_size=batch_size)
             else:
                 # Apply the function, but yield the doc
                 docs = _pipe(proc, docs)
@@ -583,7 +583,7 @@ class Language(object):
         will include the model.
 
         path (unicode or Path): A path to a directory, which will be created if
-            it doesn't exist. Paths may be either strings or `Path`-like objects.
+            it doesn't exist. Paths may be strings or `Path`-like objects.
         disable (list): Names of pipeline components to disable and prevent
             from being saved.
 
@@ -649,7 +649,7 @@ class Language(object):
         serializers = OrderedDict((
             ('vocab', lambda: self.vocab.to_bytes()),
             ('tokenizer', lambda: self.tokenizer.to_bytes(vocab=False)),
-            ('meta', lambda: ujson.dumps(self.meta))
+            ('meta', lambda: json_dumps(self.meta))
         ))
         for i, (name, proc) in enumerate(self.pipeline):
             if name in disable:
@@ -682,14 +682,14 @@ class Language(object):
 
 
 class DisabledPipes(list):
-    '''Manager for temporary pipeline disabling.'''
+    """Manager for temporary pipeline disabling."""
     def __init__(self, nlp, *names):
         self.nlp = nlp
         self.names = names
         # Important! Not deep copy -- we just want the container (but we also
         # want to support people providing arbitrarily typed nlp.pipeline
         # objects.)
-        self.original_pipeline = copy.copy(nlp.pipeline)
+        self.original_pipeline = copy(nlp.pipeline)
         list.__init__(self)
         self.extend(nlp.remove_pipe(name) for name in names)
 
@@ -702,7 +702,8 @@ class DisabledPipes(list):
     def restore(self):
         '''Restore the pipeline to its state when DisabledPipes was created.'''
         current, self.nlp.pipeline = self.nlp.pipeline, self.original_pipeline
-        unexpected = [name for name, pipe in current if not self.nlp.has_pipe(name)]
+        unexpected = [name for name, pipe in current
+                      if not self.nlp.has_pipe(name)]
         if unexpected:
             # Don't change the pipeline if we're raising an error.
             self.nlp.pipeline = current
diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py
index f3327a1d7..40cd995e2 100644
--- a/spacy/lemmatizer.py
+++ b/spacy/lemmatizer.py
@@ -43,16 +43,15 @@ class Lemmatizer(object):
         morphology = {} if morphology is None else morphology
         others = [key for key in morphology
                   if key not in (POS, 'Number', 'POS', 'VerbForm', 'Tense')]
-        true_morph_key = morphology.get('morph', 0)
         if univ_pos == 'noun' and morphology.get('Number') == 'sing':
             return True
         elif univ_pos == 'verb' and morphology.get('VerbForm') == 'inf':
             return True
         # This maps 'VBP' to base form -- probably just need 'IS_BASE'
         # morphology
-        elif univ_pos == 'verb' and (morphology.get('VerbForm') == 'fin' and \
-                                     morphology.get('Tense') == 'pres' and \
-                                     morphology.get('Number') is None and \
+        elif univ_pos == 'verb' and (morphology.get('VerbForm') == 'fin' and
+                                     morphology.get('Tense') == 'pres' and
+                                     morphology.get('Number') is None and
                                      not others):
             return True
         elif univ_pos == 'adj' and morphology.get('Degree') == 'pos':
@@ -89,9 +88,6 @@ class Lemmatizer(object):
 def lemmatize(string, index, exceptions, rules):
     string = string.lower()
     forms = []
-    # TODO: Is this correct? See discussion in Issue #435.
-    #if string in index:
-    #    forms.append(string)
     forms.extend(exceptions.get(string, []))
     oov_forms = []
     if not forms:
diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx
index f0f5c6398..88748af33 100644
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@@ -2,27 +2,17 @@
 # coding: utf8
 from __future__ import unicode_literals, print_function
 
-from libc.math cimport sqrt
-from cpython.ref cimport Py_INCREF
-from cymem.cymem cimport Pool
-from murmurhash.mrmr cimport hash64
-
 # Compiler crashes on memory view coercion without this. Should report bug.
 from cython.view cimport array as cvarray
 cimport numpy as np
 np.import_array()
-
 from libc.string cimport memset
 import numpy
 
 from .typedefs cimport attr_t, flags_t
 from .attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE
 from .attrs cimport IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM, LIKE_EMAIL, IS_STOP
-from .attrs cimport IS_BRACKET
-from .attrs cimport IS_QUOTE
-from .attrs cimport IS_LEFT_PUNCT
-from .attrs cimport IS_RIGHT_PUNCT
-from .attrs cimport IS_OOV
+from .attrs cimport IS_BRACKET, IS_QUOTE, IS_LEFT_PUNCT, IS_RIGHT_PUNCT, IS_OOV
 from . import about
 
 
@@ -32,8 +22,8 @@ memset(&EMPTY_LEXEME, 0, sizeof(LexemeC))
 cdef class Lexeme:
     """An entry in the vocabulary. A `Lexeme` has no string context – it's a
     word-type, as opposed to a word token.  It therefore has no part-of-speech
-    tag, dependency parse, or lemma (lemmatization depends on the part-of-speech
-    tag).
+    tag, dependency parse, or lemma (lemmatization depends on the
+    part-of-speech tag).
     """
     def __init__(self, Vocab vocab, attr_t orth):
         """Create a Lexeme object.
@@ -60,17 +50,17 @@ cdef class Lexeme:
         else:
             a = 0
             b = 1
-        if op == 2: # ==
+        if op == 2:  # ==
             return a == b
-        elif op == 3: # !=
+        elif op == 3:  # !=
             return a != b
-        elif op == 0: # <
+        elif op == 0:  # <
             return a < b
-        elif op == 1: # <=
+        elif op == 1:  # <=
             return a <= b
-        elif op == 4: # >
+        elif op == 4:  # >
             return a > b
-        elif op == 5: # >=
+        elif op == 5:  # >=
             return a >= b
         else:
             raise NotImplementedError(op)
@@ -104,7 +94,8 @@ cdef class Lexeme:
         """
         if self.vector_norm == 0 or other.vector_norm == 0:
             return 0.0
-        return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
+        return (numpy.dot(self.vector, other.vector) /
+                (self.vector_norm * other.vector_norm))
 
     def to_bytes(self):
         lex_data = Lexeme.c_to_bytes(self.c)
@@ -130,19 +121,13 @@ cdef class Lexeme:
         self.orth = self.c.orth
 
     property has_vector:
-        """A boolean value indicating whether a word vector is associated with
-        the object.
-
-        RETURNS (bool): Whether a word vector is associated with the object.
+        """RETURNS (bool): Whether a word vector is associated with the object.
         """
         def __get__(self):
             return self.vocab.has_vector(self.c.orth)
 
     property vector_norm:
-        """The L2 norm of the lexeme's vector representation.
-
-        RETURNS (float): The L2 norm of the vector representation.
-        """
+        """RETURNS (float): The L2 norm of the vector representation."""
         def __get__(self):
             vector = self.vector
             return numpy.sqrt((vector**2).sum())
@@ -169,149 +154,320 @@ cdef class Lexeme:
             self.vocab.set_vector(self.c.orth, vector)
 
     property rank:
+        """RETURNS (unicode): Sequential ID of the lexemes's lexical type, used
+            to index into tables, e.g. for word vectors."""
         def __get__(self):
             return self.c.id
+
         def __set__(self, value):
             self.c.id = value
 
     property sentiment:
+        """RETURNS (float): A scalar value indicating the positivity or
+            negativity of the lexeme."""
         def __get__(self):
             return self.c.sentiment
+
         def __set__(self, float sentiment):
             self.c.sentiment = sentiment
 
     property orth_:
+        """RETURNS (unicode): The original verbatim text of the lexeme
+            (identical to `Lexeme.text`). Exists mostly for consistency with
+            the other attributes."""
         def __get__(self):
             return self.vocab.strings[self.c.orth]
 
     property text:
-        """A unicode representation of the token text.
-
-        RETURNS (unicode): The original verbatim text of the token.
-        """
+        """RETURNS (unicode): The original verbatim text of the lexeme."""
         def __get__(self):
             return self.orth_
 
     property lower:
-        def __get__(self): return self.c.lower
-        def __set__(self, attr_t x): self.c.lower = x
+        """RETURNS (unicode): Lowercase form of the lexeme."""
+        def __get__(self):
+            return self.c.lower
+
+        def __set__(self, attr_t x):
+            self.c.lower = x
 
     property norm:
-        def __get__(self): return self.c.norm
-        def __set__(self, attr_t x): self.c.norm = x
+        """RETURNS (uint64): The lexemes's norm, i.e. a normalised form of the
+            lexeme text.
+        """
+        def __get__(self):
+                return self.c.norm
+
+        def __set__(self, attr_t x):
+            self.c.norm = x
 
     property shape:
-        def __get__(self): return self.c.shape
-        def __set__(self, attr_t x): self.c.shape = x
+        """RETURNS (uint64): Transform of the word's string, to show
+            orthographic features.
+        """
+        def __get__(self):
+            return self.c.shape
+
+        def __set__(self, attr_t x):
+            self.c.shape = x
 
     property prefix:
-        def __get__(self): return self.c.prefix
-        def __set__(self, attr_t x): self.c.prefix = x
+        """RETURNS (uint64): Length-N substring from the start of the word.
+            Defaults to `N=1`.
+        """
+        def __get__(self):
+            return self.c.prefix
+
+        def __set__(self, attr_t x):
+            self.c.prefix = x
 
     property suffix:
-        def __get__(self): return self.c.suffix
-        def __set__(self, attr_t x): self.c.suffix = x
+        """RETURNS (uint64): Length-N substring from the end of the word.
+            Defaults to `N=3`.
+        """
+        def __get__(self):
+            return self.c.suffix
+
+        def __set__(self, attr_t x):
+            self.c.suffix = x
 
     property cluster:
-        def __get__(self): return self.c.cluster
-        def __set__(self, attr_t x): self.c.cluster = x
+        """RETURNS (int): Brown cluster ID."""
+        def __get__(self):
+            return self.c.cluster
+
+        def __set__(self, attr_t x):
+            self.c.cluster = x
 
     property lang:
-        def __get__(self): return self.c.lang
-        def __set__(self, attr_t x): self.c.lang = x
+        """RETURNS (uint64): Language of the parent vocabulary."""
+        def __get__(self):
+            return self.c.lang
+
+        def __set__(self, attr_t x):
+            self.c.lang = x
 
     property prob:
-        def __get__(self): return self.c.prob
-        def __set__(self, float x): self.c.prob = x
+        """RETURNS (float): Smoothed log probability estimate of the lexeme's
+            type."""
+        def __get__(self):
+            return self.c.prob
+
+        def __set__(self, float x):
+            self.c.prob = x
 
     property lower_:
-        def __get__(self): return self.vocab.strings[self.c.lower]
-        def __set__(self, unicode x): self.c.lower = self.vocab.strings.add(x)
+        """RETURNS (unicode): Lowercase form of the word."""
+        def __get__(self):
+            return self.vocab.strings[self.c.lower]
+
+        def __set__(self, unicode x):
+            self.c.lower = self.vocab.strings.add(x)
 
     property norm_:
-        def __get__(self): return self.vocab.strings[self.c.norm]
-        def __set__(self, unicode x): self.c.norm = self.vocab.strings.add(x)
+        """RETURNS (unicode): The lexemes's norm, i.e. a normalised form of the
+            lexeme text.
+        """
+        def __get__(self):
+            return self.vocab.strings[self.c.norm]
+
+        def __set__(self, unicode x):
+            self.c.norm = self.vocab.strings.add(x)
 
     property shape_:
-        def __get__(self): return self.vocab.strings[self.c.shape]
-        def __set__(self, unicode x): self.c.shape = self.vocab.strings.add(x)
+        """RETURNS (unicode): Transform of the word's string, to show
+            orthographic features.
+        """
+        def __get__(self):
+            return self.vocab.strings[self.c.shape]
+
+        def __set__(self, unicode x):
+            self.c.shape = self.vocab.strings.add(x)
 
     property prefix_:
-        def __get__(self): return self.vocab.strings[self.c.prefix]
-        def __set__(self, unicode x): self.c.prefix = self.vocab.strings.add(x)
+        """RETURNS (unicode): Length-N substring from the start of the word.
+            Defaults to `N=1`.
+        """
+        def __get__(self):
+            return self.vocab.strings[self.c.prefix]
+
+        def __set__(self, unicode x):
+            self.c.prefix = self.vocab.strings.add(x)
 
     property suffix_:
-        def __get__(self): return self.vocab.strings[self.c.suffix]
-        def __set__(self, unicode x): self.c.suffix = self.vocab.strings.add(x)
+        """RETURNS (unicode): Length-N substring from the end of the word.
+            Defaults to `N=3`.
+        """
+        def __get__(self):
+            return self.vocab.strings[self.c.suffix]
+
+        def __set__(self, unicode x):
+            self.c.suffix = self.vocab.strings.add(x)
 
     property lang_:
-        def __get__(self): return self.vocab.strings[self.c.lang]
-        def __set__(self, unicode x): self.c.lang = self.vocab.strings.add(x)
+        """RETURNS (unicode): Language of the parent vocabulary."""
+        def __get__(self):
+            return self.vocab.strings[self.c.lang]
+
+        def __set__(self, unicode x):
+            self.c.lang = self.vocab.strings.add(x)
 
     property flags:
-        def __get__(self): return self.c.flags
-        def __set__(self, flags_t x): self.c.flags = x
+        """RETURNS (uint64): Container of the lexeme's binary flags."""
+        def __get__(self):
+            return self.c.flags
+
+        def __set__(self, flags_t x):
+            self.c.flags = x
 
     property is_oov:
-        def __get__(self): return Lexeme.c_check_flag(self.c, IS_OOV)
-        def __set__(self, attr_t x): Lexeme.c_set_flag(self.c, IS_OOV, x)
+        """RETURNS (bool): Whether the lexeme is out-of-vocabulary."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_OOV)
+
+        def __set__(self, attr_t x):
+            Lexeme.c_set_flag(self.c, IS_OOV, x)
 
     property is_stop:
-        def __get__(self): return Lexeme.c_check_flag(self.c, IS_STOP)
-        def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_STOP, x)
+        """RETURNS (bool): Whether the lexeme is a stop word."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_STOP)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_STOP, x)
 
     property is_alpha:
-        def __get__(self): return Lexeme.c_check_flag(self.c, IS_ALPHA)
-        def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_ALPHA, x)
+        """RETURNS (bool): Whether the lexeme consists of alphanumeric
+            characters. Equivalent to `lexeme.text.isalpha()`.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_ALPHA)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_ALPHA, x)
 
     property is_ascii:
-        def __get__(self): return Lexeme.c_check_flag(self.c, IS_ASCII)
-        def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_ASCII, x)
+        """RETURNS (bool): Whether the lexeme consists of ASCII characters.
+            Equivalent to `[any(ord(c) >= 128 for c in lexeme.text)]`.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_ASCII)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_ASCII, x)
 
     property is_digit:
-        def __get__(self): return Lexeme.c_check_flag(self.c, IS_DIGIT)
-        def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_DIGIT, x)
+        """RETURNS (bool): Whether the lexeme consists of digits. Equivalent
+            to `lexeme.text.isdigit()`.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_DIGIT)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_DIGIT, x)
 
     property is_lower:
-        def __get__(self): return Lexeme.c_check_flag(self.c, IS_LOWER)
-        def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_LOWER, x)
+        """RETURNS (bool): Whether the lexeme is in lowercase. Equivalent to
+            `lexeme.text.islower()`.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_LOWER)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_LOWER, x)
+
+    property is_upper:
+        """RETURNS (bool): Whether the lexeme is in uppercase. Equivalent to
+            `lexeme.text.isupper()`.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_UPPER)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_UPPER, x)
 
     property is_title:
-        def __get__(self): return Lexeme.c_check_flag(self.c, IS_TITLE)
-        def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_TITLE, x)
+        """RETURNS (bool): Whether the lexeme is in titlecase. Equivalent to
+            `lexeme.text.istitle()`.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_TITLE)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_TITLE, x)
 
     property is_punct:
-        def __get__(self): return Lexeme.c_check_flag(self.c, IS_PUNCT)
-        def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_PUNCT, x)
+        """RETURNS (bool): Whether the lexeme is punctuation."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_PUNCT)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_PUNCT, x)
 
     property is_space:
-        def __get__(self): return Lexeme.c_check_flag(self.c, IS_SPACE)
-        def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_SPACE, x)
+        """RETURNS (bool): Whether the lexeme consist of whitespace characters.
+            Equivalent to `lexeme.text.isspace()`.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_SPACE)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_SPACE, x)
 
     property is_bracket:
-        def __get__(self): return Lexeme.c_check_flag(self.c, IS_BRACKET)
-        def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_BRACKET, x)
+        """RETURNS (bool): Whether the lexeme is a bracket."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_BRACKET)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_BRACKET, x)
 
     property is_quote:
-        def __get__(self): return Lexeme.c_check_flag(self.c, IS_QUOTE)
-        def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_QUOTE, x)
+        """RETURNS (bool): Whether the lexeme is a quotation mark."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_QUOTE)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_QUOTE, x)
 
     property is_left_punct:
-        def __get__(self): return Lexeme.c_check_flag(self.c, IS_LEFT_PUNCT)
-        def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_LEFT_PUNCT, x)
+        """RETURNS (bool): Whether the lexeme is left punctuation, e.g. )."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_LEFT_PUNCT)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_LEFT_PUNCT, x)
 
     property is_right_punct:
-        def __get__(self): return Lexeme.c_check_flag(self.c, IS_RIGHT_PUNCT)
-        def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_RIGHT_PUNCT, x)
+        """RETURNS (bool): Whether the lexeme is right punctuation, e.g. )."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_RIGHT_PUNCT)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_RIGHT_PUNCT, x)
 
     property like_url:
-        def __get__(self): return Lexeme.c_check_flag(self.c, LIKE_URL)
-        def __set__(self, bint x): Lexeme.c_set_flag(self.c, LIKE_URL, x)
+        """RETURNS (bool): Whether the lexeme resembles a URL."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, LIKE_URL)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, LIKE_URL, x)
 
     property like_num:
-        def __get__(self): return Lexeme.c_check_flag(self.c, LIKE_NUM)
-        def __set__(self, bint x): Lexeme.c_set_flag(self.c, LIKE_NUM, x)
+        """RETURNS (bool): Whether the lexeme represents a number, e.g. "10.9",
+            "10", "ten", etc.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, LIKE_NUM)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, LIKE_NUM, x)
 
     property like_email:
-        def __get__(self): return Lexeme.c_check_flag(self.c, LIKE_EMAIL)
-        def __set__(self, bint x): Lexeme.c_set_flag(self.c, LIKE_EMAIL, x)
+        """RETURNS (bool): Whether the lexeme resembles an email address."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, LIKE_EMAIL)
+
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, LIKE_EMAIL, x)
diff --git a/spacy/matcher.pyx b/spacy/matcher.pyx
index 401405c14..a6b02ba2c 100644
--- a/spacy/matcher.pyx
+++ b/spacy/matcher.pyx
@@ -4,12 +4,6 @@
 from __future__ import unicode_literals
 
 import ujson
-
-from .typedefs cimport attr_t
-from .typedefs cimport hash_t
-from .attrs cimport attr_id_t
-from .structs cimport TokenC
-
 from cymem.cymem cimport Pool
 from preshed.maps cimport PreshMap
 from libcpp.vector cimport vector
@@ -17,14 +11,15 @@ from libcpp.pair cimport pair
 from murmurhash.mrmr cimport hash64
 from libc.stdint cimport int32_t
 
-from .attrs cimport ID, NULL_ATTR, ENT_TYPE
-from . import attrs
-from .tokens.doc cimport get_token_attr
-from .tokens.doc cimport Doc
+from .typedefs cimport attr_t
+from .typedefs cimport hash_t
+from .structs cimport TokenC
+from .tokens.doc cimport Doc, get_token_attr
 from .vocab cimport Vocab
 
+from .attrs import IDS
+from .attrs cimport attr_id_t, ID, NULL_ATTR
 from .attrs import FLAG61 as U_ENT
-
 from .attrs import FLAG60 as B2_ENT
 from .attrs import FLAG59 as B3_ENT
 from .attrs import FLAG58 as B4_ENT
@@ -34,7 +29,6 @@ from .attrs import FLAG55 as B7_ENT
 from .attrs import FLAG54 as B8_ENT
 from .attrs import FLAG53 as B9_ENT
 from .attrs import FLAG52 as B10_ENT
-
 from .attrs import FLAG51 as I3_ENT
 from .attrs import FLAG50 as I4_ENT
 from .attrs import FLAG49 as I5_ENT
@@ -43,7 +37,6 @@ from .attrs import FLAG47 as I7_ENT
 from .attrs import FLAG46 as I8_ENT
 from .attrs import FLAG45 as I9_ENT
 from .attrs import FLAG44 as I10_ENT
-
 from .attrs import FLAG43 as L2_ENT
 from .attrs import FLAG42 as L3_ENT
 from .attrs import FLAG41 as L4_ENT
@@ -153,7 +146,7 @@ cdef int get_action(const TokenPatternC* pattern, const TokenC* token) nogil:
 def _convert_strings(token_specs, string_store):
     # Support 'syntactic sugar' operator '+', as combination of ONE, ZERO_PLUS
     operators = {'!': (ZERO,), '*': (ZERO_PLUS,), '+': (ONE, ZERO_PLUS),
-            '?': (ZERO_ONE,), '1': (ONE,)}
+                 '?': (ZERO_ONE,), '1': (ONE,)}
     tokens = []
     op = ONE
     for spec in token_specs:
@@ -168,10 +161,10 @@ def _convert_strings(token_specs, string_store):
                 if value in operators:
                     ops = operators[value]
                 else:
-                    raise KeyError(
-                        "Unknown operator '%s'. Options: %s" % (value, ', '.join(operators.keys())))
+                    msg = "Unknown operator '%s'. Options: %s"
+                    raise KeyError(msg % (value, ', '.join(operators.keys())))
             if isinstance(attr, basestring):
-                attr = attrs.IDS.get(attr.upper())
+                attr = IDS.get(attr.upper())
             if isinstance(value, basestring):
                 value = string_store.add(value)
             if isinstance(value, bool):
@@ -186,7 +179,7 @@ def _convert_strings(token_specs, string_store):
 def merge_phrase(matcher, doc, i, matches):
     """Callback to merge a phrase on match."""
     ent_id, label, start, end = matches[i]
-    span = doc[start : end]
+    span = doc[start:end]
     span.merge(ent_type=label, ent_id=ent_id)
 
 
@@ -233,13 +226,13 @@ cdef class Matcher:
         return self._normalize_key(key) in self._patterns
 
     def add(self, key, on_match, *patterns):
-        """Add a match-rule to the matcher. A match-rule consists of: an ID key,
-        an on_match callback, and one or more patterns.
+        """Add a match-rule to the matcher. A match-rule consists of: an ID
+        key, an on_match callback, and one or more patterns.
 
         If the key exists, the patterns are appended to the previous ones, and
-        the previous on_match callback is replaced. The `on_match` callback will
-        receive the arguments `(matcher, doc, i, matches)`. You can also set
-        `on_match` to `None` to not perform any actions.
+        the previous on_match callback is replaced. The `on_match` callback
+        will receive the arguments `(matcher, doc, i, matches)`. You can also
+        set `on_match` to `None` to not perform any actions.
 
         A pattern consists of one or more `token_specs`, where a `token_spec`
         is a dictionary mapping attribute IDs to values, and optionally a
@@ -253,8 +246,8 @@ cdef class Matcher:
         The + and * operators are usually interpretted "greedily", i.e. longer
         matches are returned where possible. However, if you specify two '+'
         and '*' patterns in a row and their matches overlap, the first
-        operator will behave non-greedily. This quirk in the semantics
-        makes the matcher more efficient, by avoiding the need for back-tracking.
+        operator will behave non-greedily. This quirk in the semantics makes
+        the matcher more efficient, by avoiding the need for back-tracking.
 
         key (unicode): The match ID.
         on_match (callable): Callback executed on match.
@@ -268,7 +261,6 @@ cdef class Matcher:
         key = self._normalize_key(key)
         self._patterns.setdefault(key, [])
         self._callbacks[key] = on_match
-
         for pattern in patterns:
             specs = _convert_strings(pattern, self.vocab.strings)
             self.patterns.push_back(init_pattern(self.mem, key, specs))
@@ -315,9 +307,9 @@ cdef class Matcher:
         """Match a stream of documents, yielding them in turn.
 
         docs (iterable): A stream of documents.
-        batch_size (int): The number of documents to accumulate into a working set.
+        batch_size (int): Number of documents to accumulate into a working set.
         n_threads (int): The number of threads with which to work on the buffer
-            in parallel, if the `Matcher` implementation supports multi-threading.
+            in parallel, if the implementation supports multi-threading.
         YIELDS (Doc): Documents, in order.
         """
         for doc in docs:
@@ -325,7 +317,7 @@ cdef class Matcher:
             yield doc
 
     def __call__(self, Doc doc):
-        """Find all token sequences matching the supplied patterns on the `Doc`.
+        """Find all token sequences matching the supplied pattern.
 
         doc (Doc): The document to match over.
         RETURNS (list): A list of `(key, start, end)` tuples,
@@ -342,8 +334,8 @@ cdef class Matcher:
         for token_i in range(doc.length):
             token = &doc.c[token_i]
             q = 0
-            # Go over the open matches, extending or finalizing if able. Otherwise,
-            # we over-write them (q doesn't advance)
+            # Go over the open matches, extending or finalizing if able.
+            # Otherwise, we over-write them (q doesn't advance)
             for state in partials:
                 action = get_action(state.second, token)
                 if action == PANIC:
@@ -356,8 +348,8 @@ cdef class Matcher:
 
                 if action == REPEAT:
                     # Leave the state in the queue, and advance to next slot
-                    # (i.e. we don't overwrite -- we want to greedily match more
-                    # pattern.
+                    # (i.e. we don't overwrite -- we want to greedily match
+                    # more pattern.
                     q += 1
                 elif action == REJECT:
                     pass
@@ -366,8 +358,8 @@ cdef class Matcher:
                     partials[q].second += 1
                     q += 1
                 elif action in (ACCEPT, ACCEPT_PREV):
-                    # TODO: What to do about patterns starting with ZERO? Need to
-                    # adjust the start position.
+                    # TODO: What to do about patterns starting with ZERO? Need
+                    # to adjust the start position.
                     start = state.first
                     end = token_i+1 if action == ACCEPT else token_i
                     ent_id = state.second[1].attrs[0].value
@@ -388,8 +380,8 @@ cdef class Matcher:
                     state.second = pattern
                     partials.push_back(state)
                 elif action == ADVANCE:
-                    # TODO: What to do about patterns starting with ZERO? Need to
-                    # adjust the start position.
+                    # TODO: What to do about patterns starting with ZERO? Need
+                    # to adjust the start position.
                     state.first = token_i
                     state.second = pattern + 1
                     partials.push_back(state)
@@ -413,7 +405,6 @@ cdef class Matcher:
             on_match = self._callbacks.get(ent_id)
             if on_match is not None:
                 on_match(self, doc, i, matches)
-        # TODO: only return (match_id, start, end)
         return matches
 
     def _normalize_key(self, key):
@@ -441,7 +432,8 @@ def get_bilou(length):
     elif length == 8:
         return [B8_ENT, I8_ENT, I8_ENT, I8_ENT, I8_ENT, I8_ENT, I8_ENT, L8_ENT]
     elif length == 9:
-        return [B9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, L9_ENT]
+        return [B9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT,
+                L9_ENT]
     elif length == 10:
         return [B10_ENT, I10_ENT, I10_ENT, I10_ENT, I10_ENT, I10_ENT, I10_ENT,
                 I10_ENT, I10_ENT, L10_ENT]
@@ -454,10 +446,8 @@ cdef class PhraseMatcher:
     cdef Vocab vocab
     cdef Matcher matcher
     cdef PreshMap phrase_ids
-
     cdef int max_length
     cdef attr_t* _phrase_key
-
     cdef public object _callbacks
     cdef public object _patterns
 
@@ -470,7 +460,8 @@ cdef class PhraseMatcher:
         self.phrase_ids = PreshMap()
         abstract_patterns = []
         for length in range(1, max_length):
-            abstract_patterns.append([{tag: True} for tag in get_bilou(length)])
+            abstract_patterns.append([{tag: True}
+                                      for tag in get_bilou(length)])
         self.matcher.add('Candidate', None, *abstract_patterns)
         self._callbacks = {}
 
@@ -496,8 +487,8 @@ cdef class PhraseMatcher:
         return (self.__class__, (self.vocab,), None, None)
 
     def add(self, key, on_match, *docs):
-        """Add a match-rule to the matcher. A match-rule consists of: an ID key,
-        an on_match callback, and one or more patterns.
+        """Add a match-rule to the matcher. A match-rule consists of: an ID
+        key, an on_match callback, and one or more patterns.
 
         key (unicode): The match ID.
         on_match (callable): Callback executed on match.
@@ -513,7 +504,6 @@ cdef class PhraseMatcher:
                 raise ValueError(msg % (len(doc), self.max_length))
         cdef hash_t ent_id = self.matcher._normalize_key(key)
         self._callbacks[ent_id] = on_match
-
         cdef int length
         cdef int i
         cdef hash_t phrase_hash
@@ -553,9 +543,9 @@ cdef class PhraseMatcher:
         """Match a stream of documents, yielding them in turn.
 
         docs (iterable): A stream of documents.
-        batch_size (int): The number of documents to accumulate into a working set.
+        batch_size (int): Number of documents to accumulate into a working set.
         n_threads (int): The number of threads with which to work on the buffer
-            in parallel, if the `Matcher` implementation supports multi-threading.
+            in parallel, if the implementation supports multi-threading.
         YIELDS (Doc): Documents, in order.
         """
         for doc in stream:
@@ -569,7 +559,8 @@ cdef class PhraseMatcher:
             self._phrase_key[i] = 0
         for i, j in enumerate(range(start, end)):
             self._phrase_key[i] = doc.c[j].lex.orth
-        cdef hash_t key = hash64(self._phrase_key, self.max_length * sizeof(attr_t), 0)
+        cdef hash_t key = hash64(self._phrase_key,
+                                 self.max_length * sizeof(attr_t), 0)
         ent_id = <hash_t>self.phrase_ids.get(key)
         if ent_id == 0:
             return None
diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx
index 91befaa1b..b3989839d 100644
--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@@ -4,17 +4,15 @@ from __future__ import unicode_literals
 
 from libc.string cimport memset
 
-from .parts_of_speech cimport ADJ, VERB, NOUN, PUNCT, SPACE
 from .attrs cimport POS, IS_SPACE
+from .attrs import LEMMA, intify_attrs
+from .parts_of_speech cimport SPACE
 from .parts_of_speech import IDS as POS_IDS
 from .lexeme cimport Lexeme
-from .attrs import LEMMA, intify_attrs
 
 
 def _normalize_props(props):
-    """
-    Transform deprecated string keys to correct names.
-    """
+    """Transform deprecated string keys to correct names."""
     out = {}
     for key, value in props.items():
         if key == POS:
@@ -77,7 +75,8 @@ cdef class Morphology:
     cdef int assign_untagged(self, TokenC* token) except -1:
         """Set morphological attributes on a token without a POS tag. Uses
         the lemmatizer's lookup() method, which looks up the string in the
-        table provided by the language data as lemma_lookup (if available)."""
+        table provided by the language data as lemma_lookup (if available).
+        """
         if token.lemma == 0:
             orth_str = self.strings[token.lex.orth]
             lemma = self.lemmatizer.lookup(orth_str)
@@ -95,11 +94,10 @@ cdef class Morphology:
     cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1:
         if tag_id > self.n_tags:
             raise ValueError("Unknown tag ID: %s" % tag_id)
-        # TODO: It's pretty arbitrary to put this logic here. I guess the justification
-        # is that this is where the specific word and the tag interact. Still,
-        # we should have a better way to enforce this rule, or figure out why
-        # the statistical model fails.
-        # Related to Issue #220
+        # TODO: It's pretty arbitrary to put this logic here. I guess the
+        # justification is that this is where the specific word and the tag
+        # interact. Still, we should have a better way to enforce this rule, or
+        # figure out why the statistical model fails. Related to Issue #220
         if Lexeme.c_check_flag(token.lex, IS_SPACE):
             tag_id = self.reverse_index[self.strings.add('_SP')]
         rich_tag = self.rich_tags[tag_id]
@@ -123,14 +121,13 @@ cdef class Morphology:
         else:
             flags[0] &= ~(one << flag_id)
 
-    def add_special_case(self, unicode tag_str, unicode orth_str, attrs, force=False):
-        """
-        Add a special-case rule to the morphological analyser. Tokens whose
+    def add_special_case(self, unicode tag_str, unicode orth_str, attrs,
+                         force=False):
+        """Add a special-case rule to the morphological analyser. Tokens whose
         tag and orth match the rule will receive the specified properties.
 
-        Arguments:
-            tag (unicode): The part-of-speech tag to key the exception.
-            orth (unicode): The word-form to key the exception.
+        tag (unicode): The part-of-speech tag to key the exception.
+        orth (unicode): The word-form to key the exception.
         """
         self.exc[(tag_str, orth_str)] = dict(attrs)
         tag = self.strings.add(tag_str)
@@ -144,10 +141,9 @@ cdef class Morphology:
         elif force:
             memset(cached, 0, sizeof(cached[0]))
         else:
-            msg = ("Conflicting morphology exception for (%s, %s). Use force=True "
-                   "to overwrite.")
-            msg = msg % (tag_str, orth_str)
-            raise ValueError(msg)
+            raise ValueError(
+                "Conflicting morphology exception for (%s, %s). Use "
+                "force=True to overwrite." % (tag_str, orth_str))
 
         cached.tag = rich_tag
         # TODO: Refactor this to take arbitrary attributes.
@@ -218,7 +214,7 @@ IDS = {
     "Definite_two": Definite_two,
     "Definite_def": Definite_def,
     "Definite_red": Definite_red,
-    "Definite_cons": Definite_cons, # U20
+    "Definite_cons": Definite_cons,  # U20
     "Definite_ind": Definite_ind,
     "Degree_cmp": Degree_cmp,
     "Degree_comp": Degree_comp,
@@ -227,7 +223,7 @@ IDS = {
     "Degree_sup": Degree_sup,
     "Degree_abs": Degree_abs,
     "Degree_com": Degree_com,
-    "Degree_dim ": Degree_dim, # du
+    "Degree_dim ": Degree_dim,  # du
     "Gender_com": Gender_com,
     "Gender_fem": Gender_fem,
     "Gender_masc": Gender_masc,
@@ -242,15 +238,15 @@ IDS = {
     "Negative_neg": Negative_neg,
     "Negative_pos": Negative_pos,
     "Negative_yes": Negative_yes,
-    "Polarity_neg": Polarity_neg, # U20
-    "Polarity_pos": Polarity_pos, # U20
+    "Polarity_neg": Polarity_neg,  # U20
+    "Polarity_pos": Polarity_pos,  # U20
     "Number_com": Number_com,
     "Number_dual": Number_dual,
     "Number_none": Number_none,
     "Number_plur": Number_plur,
     "Number_sing": Number_sing,
-    "Number_ptan ": Number_ptan, # bg
-    "Number_count ": Number_count, # bg
+    "Number_ptan ": Number_ptan,  # bg
+    "Number_count ": Number_count,  # bg
     "NumType_card": NumType_card,
     "NumType_dist": NumType_dist,
     "NumType_frac": NumType_frac,
@@ -276,7 +272,7 @@ IDS = {
     "PronType_rel": PronType_rel,
     "PronType_tot": PronType_tot,
     "PronType_clit": PronType_clit,
-    "PronType_exc ": PronType_exc, # es, ca, it, fa,
+    "PronType_exc ": PronType_exc,  # es, ca, it, fa,
     "Reflex_yes": Reflex_yes,
     "Tense_fut": Tense_fut,
     "Tense_imp": Tense_imp,
@@ -292,19 +288,19 @@ IDS = {
     "VerbForm_partPres": VerbForm_partPres,
     "VerbForm_sup": VerbForm_sup,
     "VerbForm_trans": VerbForm_trans,
-    "VerbForm_conv": VerbForm_conv, # U20
-    "VerbForm_gdv ": VerbForm_gdv, # la,
+    "VerbForm_conv": VerbForm_conv,  # U20
+    "VerbForm_gdv ": VerbForm_gdv,  # la,
     "Voice_act": Voice_act,
     "Voice_cau": Voice_cau,
     "Voice_pass": Voice_pass,
-    "Voice_mid ": Voice_mid, # gkc,
-    "Voice_int ": Voice_int, # hb,
-    "Abbr_yes ": Abbr_yes, # cz, fi, sl, U,
-    "AdpType_prep ": AdpType_prep, # cz, U,
-    "AdpType_post ": AdpType_post, # U,
-    "AdpType_voc ": AdpType_voc, # cz,
-    "AdpType_comprep ": AdpType_comprep, # cz,
-    "AdpType_circ ": AdpType_circ, # U,
+    "Voice_mid ": Voice_mid,  # gkc,
+    "Voice_int ": Voice_int,  # hb,
+    "Abbr_yes ": Abbr_yes,  # cz, fi, sl, U,
+    "AdpType_prep ": AdpType_prep,  # cz, U,
+    "AdpType_post ": AdpType_post,  # U,
+    "AdpType_voc ": AdpType_voc,  # cz,
+    "AdpType_comprep ": AdpType_comprep,  # cz,
+    "AdpType_circ ": AdpType_circ,  # U,
     "AdvType_man": AdvType_man,
     "AdvType_loc": AdvType_loc,
     "AdvType_tim": AdvType_tim,
@@ -314,122 +310,122 @@ IDS = {
     "AdvType_sta": AdvType_sta,
     "AdvType_ex": AdvType_ex,
     "AdvType_adadj": AdvType_adadj,
-    "ConjType_oper ": ConjType_oper, # cz, U,
-    "ConjType_comp ": ConjType_comp, # cz, U,
-    "Connegative_yes ": Connegative_yes, # fi,
-    "Derivation_minen ": Derivation_minen, # fi,
-    "Derivation_sti ": Derivation_sti, # fi,
-    "Derivation_inen ": Derivation_inen, # fi,
-    "Derivation_lainen ": Derivation_lainen, # fi,
-    "Derivation_ja ": Derivation_ja, # fi,
-    "Derivation_ton ": Derivation_ton, # fi,
-    "Derivation_vs ": Derivation_vs, # fi,
-    "Derivation_ttain ": Derivation_ttain, # fi,
-    "Derivation_ttaa ": Derivation_ttaa, # fi,
-    "Echo_rdp ": Echo_rdp, # U,
-    "Echo_ech ": Echo_ech, # U,
-    "Foreign_foreign ": Foreign_foreign, # cz, fi, U,
-    "Foreign_fscript ": Foreign_fscript, # cz, fi, U,
-    "Foreign_tscript ": Foreign_tscript, # cz, U,
-    "Foreign_yes ": Foreign_yes, # sl,
-    "Gender_dat_masc ": Gender_dat_masc, # bq, U,
-    "Gender_dat_fem ": Gender_dat_fem, # bq, U,
-    "Gender_erg_masc ": Gender_erg_masc, # bq,
-    "Gender_erg_fem ": Gender_erg_fem, # bq,
-    "Gender_psor_masc ": Gender_psor_masc, # cz, sl, U,
-    "Gender_psor_fem ": Gender_psor_fem, # cz, sl, U,
-    "Gender_psor_neut ": Gender_psor_neut, # sl,
-    "Hyph_yes ": Hyph_yes, # cz, U,
-    "InfForm_one ": InfForm_one, # fi,
-    "InfForm_two ": InfForm_two, # fi,
-    "InfForm_three ": InfForm_three, # fi,
-    "NameType_geo ": NameType_geo, # U, cz,
-    "NameType_prs ": NameType_prs, # U, cz,
-    "NameType_giv ": NameType_giv, # U, cz,
-    "NameType_sur ": NameType_sur, # U, cz,
-    "NameType_nat ": NameType_nat, # U, cz,
-    "NameType_com ": NameType_com, # U, cz,
-    "NameType_pro ": NameType_pro, # U, cz,
-    "NameType_oth ": NameType_oth, # U, cz,
-    "NounType_com ": NounType_com, # U,
-    "NounType_prop ": NounType_prop, # U,
-    "NounType_class ": NounType_class, # U,
-    "Number_abs_sing ": Number_abs_sing, # bq, U,
-    "Number_abs_plur ": Number_abs_plur, # bq, U,
-    "Number_dat_sing ": Number_dat_sing, # bq, U,
-    "Number_dat_plur ": Number_dat_plur, # bq, U,
-    "Number_erg_sing ": Number_erg_sing, # bq, U,
-    "Number_erg_plur ": Number_erg_plur, # bq, U,
-    "Number_psee_sing ": Number_psee_sing, # U,
-    "Number_psee_plur ": Number_psee_plur, # U,
-    "Number_psor_sing ": Number_psor_sing, # cz, fi, sl, U,
-    "Number_psor_plur ": Number_psor_plur, # cz, fi, sl, U,
-    "NumForm_digit ": NumForm_digit, # cz, sl, U,
-    "NumForm_roman ": NumForm_roman, # cz, sl, U,
-    "NumForm_word ": NumForm_word, # cz, sl, U,
-    "NumValue_one ": NumValue_one, # cz, U,
-    "NumValue_two ": NumValue_two, # cz, U,
-    "NumValue_three ": NumValue_three, # cz, U,
-    "PartForm_pres ": PartForm_pres, # fi,
-    "PartForm_past ": PartForm_past, # fi,
-    "PartForm_agt ": PartForm_agt, # fi,
-    "PartForm_neg ": PartForm_neg, # fi,
-    "PartType_mod ": PartType_mod, # U,
-    "PartType_emp ": PartType_emp, # U,
-    "PartType_res ": PartType_res, # U,
-    "PartType_inf ": PartType_inf, # U,
-    "PartType_vbp ": PartType_vbp, # U,
-    "Person_abs_one ": Person_abs_one, # bq, U,
-    "Person_abs_two ": Person_abs_two, # bq, U,
-    "Person_abs_three ": Person_abs_three, # bq, U,
-    "Person_dat_one ": Person_dat_one, # bq, U,
-    "Person_dat_two ": Person_dat_two, # bq, U,
-    "Person_dat_three ": Person_dat_three, # bq, U,
-    "Person_erg_one ": Person_erg_one, # bq, U,
-    "Person_erg_two ": Person_erg_two, # bq, U,
-    "Person_erg_three ": Person_erg_three, # bq, U,
-    "Person_psor_one ": Person_psor_one, # fi, U,
-    "Person_psor_two ": Person_psor_two, # fi, U,
-    "Person_psor_three ": Person_psor_three, # fi, U,
-    "Polite_inf ": Polite_inf, # bq, U,
-    "Polite_pol ": Polite_pol, # bq, U,
-    "Polite_abs_inf ": Polite_abs_inf, # bq, U,
-    "Polite_abs_pol ": Polite_abs_pol, # bq, U,
-    "Polite_erg_inf ": Polite_erg_inf, # bq, U,
-    "Polite_erg_pol ": Polite_erg_pol, # bq, U,
-    "Polite_dat_inf ": Polite_dat_inf, # bq, U,
-    "Polite_dat_pol ": Polite_dat_pol, # bq, U,
-    "Prefix_yes ": Prefix_yes, # U,
-    "PrepCase_npr ": PrepCase_npr, # cz,
-    "PrepCase_pre ": PrepCase_pre, # U,
-    "PunctSide_ini ": PunctSide_ini, # U,
-    "PunctSide_fin ": PunctSide_fin, # U,
-    "PunctType_peri ": PunctType_peri, # U,
-    "PunctType_qest ": PunctType_qest, # U,
-    "PunctType_excl ": PunctType_excl, # U,
-    "PunctType_quot ": PunctType_quot, # U,
-    "PunctType_brck ": PunctType_brck, # U,
-    "PunctType_comm ": PunctType_comm, # U,
-    "PunctType_colo ": PunctType_colo, # U,
-    "PunctType_semi ": PunctType_semi, # U,
-    "PunctType_dash ": PunctType_dash, # U,
-    "Style_arch ": Style_arch, # cz, fi, U,
-    "Style_rare ": Style_rare, # cz, fi, U,
-    "Style_poet ": Style_poet, # cz, U,
-    "Style_norm ": Style_norm, # cz, U,
-    "Style_coll ": Style_coll, # cz, U,
-    "Style_vrnc ": Style_vrnc, # cz, U,
-    "Style_sing ": Style_sing, # cz, U,
-    "Style_expr ": Style_expr, # cz, U,
-    "Style_derg ": Style_derg, # cz, U,
-    "Style_vulg ": Style_vulg, # cz, U,
-    "Style_yes ": Style_yes, # fi, U,
-    "StyleVariant_styleShort ": StyleVariant_styleShort, # cz,
-    "StyleVariant_styleBound ": StyleVariant_styleBound, # cz, sl,
-    "VerbType_aux ": VerbType_aux, # U,
-    "VerbType_cop ": VerbType_cop, # U,
-    "VerbType_mod ": VerbType_mod, # U,
-    "VerbType_light ": VerbType_light, # U,
+    "ConjType_oper ": ConjType_oper,  # cz, U,
+    "ConjType_comp ": ConjType_comp,  # cz, U,
+    "Connegative_yes ": Connegative_yes,  # fi,
+    "Derivation_minen ": Derivation_minen,  # fi,
+    "Derivation_sti ": Derivation_sti,  # fi,
+    "Derivation_inen ": Derivation_inen,  # fi,
+    "Derivation_lainen ": Derivation_lainen,  # fi,
+    "Derivation_ja ": Derivation_ja,  # fi,
+    "Derivation_ton ": Derivation_ton,  # fi,
+    "Derivation_vs ": Derivation_vs,  # fi,
+    "Derivation_ttain ": Derivation_ttain,  # fi,
+    "Derivation_ttaa ": Derivation_ttaa,  # fi,
+    "Echo_rdp ": Echo_rdp,  # U,
+    "Echo_ech ": Echo_ech,  # U,
+    "Foreign_foreign ": Foreign_foreign,  # cz, fi, U,
+    "Foreign_fscript ": Foreign_fscript,  # cz, fi, U,
+    "Foreign_tscript ": Foreign_tscript,  # cz, U,
+    "Foreign_yes ": Foreign_yes,  # sl,
+    "Gender_dat_masc ": Gender_dat_masc,  # bq, U,
+    "Gender_dat_fem ": Gender_dat_fem,  # bq, U,
+    "Gender_erg_masc ": Gender_erg_masc,  # bq,
+    "Gender_erg_fem ": Gender_erg_fem,  # bq,
+    "Gender_psor_masc ": Gender_psor_masc,  # cz, sl, U,
+    "Gender_psor_fem ": Gender_psor_fem,  # cz, sl, U,
+    "Gender_psor_neut ": Gender_psor_neut,  # sl,
+    "Hyph_yes ": Hyph_yes,  # cz, U,
+    "InfForm_one ": InfForm_one,  # fi,
+    "InfForm_two ": InfForm_two,  # fi,
+    "InfForm_three ": InfForm_three,  # fi,
+    "NameType_geo ": NameType_geo,  # U, cz,
+    "NameType_prs ": NameType_prs,  # U, cz,
+    "NameType_giv ": NameType_giv,  # U, cz,
+    "NameType_sur ": NameType_sur,  # U, cz,
+    "NameType_nat ": NameType_nat,  # U, cz,
+    "NameType_com ": NameType_com,  # U, cz,
+    "NameType_pro ": NameType_pro,  # U, cz,
+    "NameType_oth ": NameType_oth,  # U, cz,
+    "NounType_com ": NounType_com,  # U,
+    "NounType_prop ": NounType_prop,  # U,
+    "NounType_class ": NounType_class,  # U,
+    "Number_abs_sing ": Number_abs_sing,  # bq, U,
+    "Number_abs_plur ": Number_abs_plur,  # bq, U,
+    "Number_dat_sing ": Number_dat_sing,  # bq, U,
+    "Number_dat_plur ": Number_dat_plur,  # bq, U,
+    "Number_erg_sing ": Number_erg_sing,  # bq, U,
+    "Number_erg_plur ": Number_erg_plur,  # bq, U,
+    "Number_psee_sing ": Number_psee_sing,  # U,
+    "Number_psee_plur ": Number_psee_plur,  # U,
+    "Number_psor_sing ": Number_psor_sing,  # cz, fi, sl, U,
+    "Number_psor_plur ": Number_psor_plur,  # cz, fi, sl, U,
+    "NumForm_digit ": NumForm_digit,  # cz, sl, U,
+    "NumForm_roman ": NumForm_roman,  # cz, sl, U,
+    "NumForm_word ": NumForm_word,  # cz, sl, U,
+    "NumValue_one ": NumValue_one,  # cz, U,
+    "NumValue_two ": NumValue_two,  # cz, U,
+    "NumValue_three ": NumValue_three,  # cz, U,
+    "PartForm_pres ": PartForm_pres,  # fi,
+    "PartForm_past ": PartForm_past,  # fi,
+    "PartForm_agt ": PartForm_agt,  # fi,
+    "PartForm_neg ": PartForm_neg,  # fi,
+    "PartType_mod ": PartType_mod,  # U,
+    "PartType_emp ": PartType_emp,  # U,
+    "PartType_res ": PartType_res,  # U,
+    "PartType_inf ": PartType_inf,  # U,
+    "PartType_vbp ": PartType_vbp,  # U,
+    "Person_abs_one ": Person_abs_one,  # bq, U,
+    "Person_abs_two ": Person_abs_two,  # bq, U,
+    "Person_abs_three ": Person_abs_three,  # bq, U,
+    "Person_dat_one ": Person_dat_one,  # bq, U,
+    "Person_dat_two ": Person_dat_two,  # bq, U,
+    "Person_dat_three ": Person_dat_three,  # bq, U,
+    "Person_erg_one ": Person_erg_one,  # bq, U,
+    "Person_erg_two ": Person_erg_two,  # bq, U,
+    "Person_erg_three ": Person_erg_three,  # bq, U,
+    "Person_psor_one ": Person_psor_one,  # fi, U,
+    "Person_psor_two ": Person_psor_two,  # fi, U,
+    "Person_psor_three ": Person_psor_three,  # fi, U,
+    "Polite_inf ": Polite_inf,  # bq, U,
+    "Polite_pol ": Polite_pol,  # bq, U,
+    "Polite_abs_inf ": Polite_abs_inf,  # bq, U,
+    "Polite_abs_pol ": Polite_abs_pol,  # bq, U,
+    "Polite_erg_inf ": Polite_erg_inf,  # bq, U,
+    "Polite_erg_pol ": Polite_erg_pol,  # bq, U,
+    "Polite_dat_inf ": Polite_dat_inf,  # bq, U,
+    "Polite_dat_pol ": Polite_dat_pol,  # bq, U,
+    "Prefix_yes ": Prefix_yes,  # U,
+    "PrepCase_npr ": PrepCase_npr,  # cz,
+    "PrepCase_pre ": PrepCase_pre,  # U,
+    "PunctSide_ini ": PunctSide_ini,  # U,
+    "PunctSide_fin ": PunctSide_fin,  # U,
+    "PunctType_peri ": PunctType_peri,  # U,
+    "PunctType_qest ": PunctType_qest,  # U,
+    "PunctType_excl ": PunctType_excl,  # U,
+    "PunctType_quot ": PunctType_quot,  # U,
+    "PunctType_brck ": PunctType_brck,  # U,
+    "PunctType_comm ": PunctType_comm,  # U,
+    "PunctType_colo ": PunctType_colo,  # U,
+    "PunctType_semi ": PunctType_semi,  # U,
+    "PunctType_dash ": PunctType_dash,  # U,
+    "Style_arch ": Style_arch,  # cz, fi, U,
+    "Style_rare ": Style_rare,  # cz, fi, U,
+    "Style_poet ": Style_poet,  # cz, U,
+    "Style_norm ": Style_norm,  # cz, U,
+    "Style_coll ": Style_coll,  # cz, U,
+    "Style_vrnc ": Style_vrnc,  # cz, U,
+    "Style_sing ": Style_sing,  # cz, U,
+    "Style_expr ": Style_expr,  # cz, U,
+    "Style_derg ": Style_derg,  # cz, U,
+    "Style_vulg ": Style_vulg,  # cz, U,
+    "Style_yes ": Style_yes,  # fi, U,
+    "StyleVariant_styleShort ": StyleVariant_styleShort,  # cz,
+    "StyleVariant_styleBound ": StyleVariant_styleBound,  # cz, sl,
+    "VerbType_aux ": VerbType_aux,  # U,
+    "VerbType_cop ": VerbType_cop,  # U,
+    "VerbType_mod ": VerbType_mod,  # U,
+    "VerbType_light ": VerbType_light,  # U,
 }
 
 
diff --git a/spacy/parts_of_speech.pyx b/spacy/parts_of_speech.pyx
index 38d5959b6..3925a6738 100644
--- a/spacy/parts_of_speech.pyx
+++ b/spacy/parts_of_speech.pyx
@@ -8,7 +8,7 @@ IDS = {
     "ADP": ADP,
     "ADV": ADV,
     "AUX": AUX,
-    "CONJ": CONJ, # U20
+    "CONJ": CONJ,  # U20
     "CCONJ": CCONJ,
     "DET": DET,
     "INTJ": INTJ,
diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx
index 089fef4e8..842e27069 100644
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@@ -3,26 +3,17 @@
 # coding: utf8
 from __future__ import unicode_literals
 
-from thinc.api import chain, layerize, with_getitem
 import numpy
 cimport numpy as np
 import cytoolz
-import util
 from collections import OrderedDict
 import ujson
 import msgpack
 
-from thinc.api import add, layerize, chain, clone, concatenate, with_flatten
-from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu, SELU
-from thinc.i2v import HashEmbed
-from thinc.t2v import Pooling, max_pool, mean_pool, sum_pool
-from thinc.t2t import ExtractWindow, ParametricAttention
-from thinc.misc import Residual
-from thinc.misc import BatchNorm as BN
-from thinc.misc import LayerNorm as LN
-
+from thinc.api import chain
+from thinc.v2v import Softmax
+from thinc.t2v import Pooling, max_pool, mean_pool
 from thinc.neural.util import to_categorical
-
 from thinc.neural._classes.difference import Siamese, CauchySimilarity
 
 from .tokens.doc cimport Doc
@@ -30,29 +21,23 @@ from .syntax.nn_parser cimport Parser
 from .syntax import nonproj
 from .syntax.ner cimport BiluoPushDown
 from .syntax.arc_eager cimport ArcEager
-from .tagger import Tagger
-from .syntax.stateclass cimport StateClass
-from .gold cimport GoldParse
 from .morphology cimport Morphology
 from .vocab cimport Vocab
 from .syntax import nonproj
 from .compat import json_dumps
 
-from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP, POS
-from ._ml import Tok2Vec, flatten
-from ._ml import build_text_classifier, build_tagger_model
-from ._ml import link_vectors_to_models
+from .attrs import POS
 from .parts_of_speech import X
+from ._ml import Tok2Vec, build_text_classifier, build_tagger_model
+from ._ml import link_vectors_to_models
+from . import util
 
 
 class SentenceSegmenter(object):
     """A simple spaCy hook, to allow custom sentence boundary detection logic
-    (that doesn't require the dependency parse).
-
-    To change the sentence boundary detection strategy, pass a generator
-    function `strategy` on initialization, or assign a new strategy to
-    the .strategy attribute.
-
+    (that doesn't require the dependency parse). To change the sentence
+    boundary detection strategy, pass a generator function `strategy` on
+    initialization, or assign a new strategy to the .strategy attribute.
     Sentence detection strategies should be generators that take `Doc` objects
     and yield `Span` objects for each sentence.
     """
@@ -74,16 +59,20 @@ class SentenceSegmenter(object):
         seen_period = False
         for i, word in enumerate(doc):
             if seen_period and not word.is_punct:
-                yield doc[start : word.i]
+                yield doc[start:word.i]
                 start = word.i
                 seen_period = False
             elif word.text in ['.', '!', '?']:
                 seen_period = True
         if start < len(doc):
-            yield doc[start : len(doc)]
+            yield doc[start:len(doc)]
 
 
 class Pipe(object):
+    """This class is not instantiated directly. Components inherit from it, and
+    it defines the interface that components should follow to function as
+    components in a spaCy analysis pipeline.
+    """
     name = None
 
     @classmethod
@@ -149,8 +138,7 @@ class Pipe(object):
         link_vectors_to_models(self.vocab)
 
     def use_params(self, params):
-        """Modify the pipe's model, to use the given parameter values.
-        """
+        """Modify the pipe's model, to use the given parameter values."""
         with self.model.use_params(params):
             yield
 
@@ -235,8 +223,8 @@ class Tensorizer(Pipe):
         """Construct a new statistical model. Weights are not allocated on
         initialisation.
 
-        vocab (Vocab): A `Vocab` instance. The model must share the same `Vocab`
-            instance with the `Doc` objects it will process.
+        vocab (Vocab): A `Vocab` instance. The model must share the same
+            `Vocab` instance with the `Doc` objects it will process.
         model (Model): A `Model` instance or `True` allocate one later.
         **cfg: Config parameters.
 
@@ -280,7 +268,7 @@ class Tensorizer(Pipe):
         """Return a single tensor for a batch of documents.
 
         docs (iterable): A sequence of `Doc` objects.
-        RETURNS (object): Vector representations for each token in the documents.
+        RETURNS (object): Vector representations for each token in the docs.
         """
         tokvecs = self.model(docs)
         return tokvecs
@@ -289,7 +277,7 @@ class Tensorizer(Pipe):
         """Set the tensor attribute for a batch of documents.
 
         docs (iterable): A sequence of `Doc` objects.
-        tokvecs (object): Vector representation for each token in the documents.
+        tokvecs (object): Vector representation for each token in the docs.
         """
         for doc, tokvecs in zip(docs, tokvecses):
             assert tokvecs.shape[0] == len(doc)
@@ -328,12 +316,14 @@ class Tensorizer(Pipe):
 
 class Tagger(Pipe):
     name = 'tagger'
+
     def __init__(self, vocab, model=True, **cfg):
         self.vocab = vocab
         self.model = model
         self.cfg = dict(cfg)
         self.cfg.setdefault('cnn_maxout_pieces', 2)
-        self.cfg.setdefault('pretrained_dims', self.vocab.vectors.data.shape[1])
+        self.cfg.setdefault('pretrained_dims',
+                            self.vocab.vectors.data.shape[1])
 
     def __call__(self, doc):
         tags = self.predict([doc])
@@ -353,8 +343,7 @@ class Tagger(Pipe):
         guesses = scores.argmax(axis=1)
         if not isinstance(guesses, numpy.ndarray):
             guesses = guesses.get()
-        guesses = self.model.ops.unflatten(guesses,
-                    [len(d) for d in docs])
+        guesses = self.model.ops.unflatten(guesses, [len(d) for d in docs])
         return guesses
 
     def set_annotations(self, docs, batch_tag_ids):
@@ -387,8 +376,8 @@ class Tagger(Pipe):
 
     def get_loss(self, docs, golds, scores):
         scores = self.model.ops.flatten(scores)
-        tag_index = {tag: i for i, tag in enumerate(self.vocab.morphology.tag_names)}
-
+        tag_index = {tag: i
+                     for i, tag in enumerate(self.vocab.morphology.tag_names)}
         cdef int idx = 0
         correct = numpy.zeros((scores.shape[0],), dtype='i')
         guesses = scores.argmax(axis=1)
@@ -443,17 +432,18 @@ class Tagger(Pipe):
             serialize['model'] = self.model.to_bytes
         serialize['vocab'] = self.vocab.to_bytes
 
-        serialize['tag_map'] = lambda: msgpack.dumps(self.vocab.morphology.tag_map,
-                                                     use_bin_type=True,
-                                                     encoding='utf8')
+        serialize['tag_map'] = lambda: msgpack.dumps(
+            self.vocab.morphology.tag_map, use_bin_type=True, encoding='utf8')
         return util.to_bytes(serialize, exclude)
 
     def from_bytes(self, bytes_data, **exclude):
         def load_model(b):
             if self.model is True:
-                token_vector_width = util.env_opt('token_vector_width',
-                        self.cfg.get('token_vector_width', 128))
-                self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
+                token_vector_width = util.env_opt(
+                    'token_vector_width',
+                    self.cfg.get('token_vector_width', 128))
+                self.model = self.Model(self.vocab.morphology.n_tags,
+                                        **self.cfg)
             self.model.from_bytes(b)
 
         def load_tag_map(b):
@@ -509,11 +499,11 @@ class Tagger(Pipe):
 
 
 class MultitaskObjective(Tagger):
-    '''Assist training of a parser or tagger, by training a side-objective.
-
-    Experimental
-    '''
+    """Experimental: Assist training of a parser or tagger, by training a
+    side-objective.
+    """
     name = 'nn_labeller'
+
     def __init__(self, vocab, model=True, target='dep_tag_offset', **cfg):
         self.vocab = vocab
         self.model = model
@@ -530,12 +520,12 @@ class MultitaskObjective(Tagger):
         elif hasattr(target, '__call__'):
             self.make_label = target
         else:
-            raise ValueError(
-                "MultitaskObjective target should be function or one of "
-                "['dep', 'tag', 'ent', 'dep_tag_offset', 'ent_tag']")
+            raise ValueError("MultitaskObjective target should be function or "
+                             "one of: dep, tag, ent, dep_tag_offset, ent_tag.")
         self.cfg = dict(cfg)
         self.cfg.setdefault('cnn_maxout_pieces', 2)
-        self.cfg.setdefault('pretrained_dims', self.vocab.vectors.data.shape[1])
+        self.cfg.setdefault('pretrained_dims',
+                            self.vocab.vectors.data.shape[1])
 
     @property
     def labels(self):
@@ -623,20 +613,19 @@ class MultitaskObjective(Tagger):
 
 class SimilarityHook(Pipe):
     """
-    Experimental
+    Experimental: A pipeline component to install a hook for supervised
+    similarity into `Doc` objects. Requires a `Tensorizer` to pre-process
+    documents. The similarity model can be any object obeying the Thinc `Model`
+    interface. By default, the model concatenates the elementwise mean and
+    elementwise max of the two tensors, and compares them using the
+    Cauchy-like similarity function from Chen (2013):
 
-    A pipeline component to install a hook for supervised similarity into
-    Doc objects. Requires a Tensorizer to pre-process documents. The similarity
-    model can be any object obeying the Thinc Model interface. By default,
-    the model concatenates the elementwise mean and elementwise max of the two
-    tensors, and compares them using the Cauchy-like similarity function
-    from Chen (2013):
-
-        similarity = 1. / (1. + (W * (vec1-vec2)**2).sum())
+        >>> similarity = 1. / (1. + (W * (vec1-vec2)**2).sum())
 
     Where W is a vector of dimension weights, initialized to 1.
     """
     name = 'similarity'
+
     def __init__(self, vocab, model=True, **cfg):
         self.vocab = vocab
         self.model = model
@@ -662,8 +651,7 @@ class SimilarityHook(Pipe):
         sims, bp_sims = self.model.begin_update(doc1_doc2, drop=drop)
 
     def begin_training(self, _=tuple(), pipeline=None):
-        """
-        Allocate model, using width from tensorizer in pipeline.
+        """Allocate model, using width from tensorizer in pipeline.
 
         gold_tuples (iterable): Gold-standard training data.
         pipeline (list): The pipeline the model is part of.
@@ -763,12 +751,14 @@ cdef class DependencyParser(Parser):
         for target in []:
             labeller = MultitaskObjective(self.vocab, target=target)
             tok2vec = self.model[0]
-            labeller.begin_training(gold_tuples, pipeline=pipeline, tok2vec=tok2vec)
+            labeller.begin_training(gold_tuples, pipeline=pipeline,
+                                    tok2vec=tok2vec)
             pipeline.append(labeller)
             self._multitasks.append(labeller)
 
     def __reduce__(self):
-        return (DependencyParser, (self.vocab, self.moves, self.model), None, None)
+        return (DependencyParser, (self.vocab, self.moves, self.model),
+                None, None)
 
 
 cdef class EntityRecognizer(Parser):
@@ -781,12 +771,14 @@ cdef class EntityRecognizer(Parser):
         for target in []:
             labeller = MultitaskObjective(self.vocab, target=target)
             tok2vec = self.model[0]
-            labeller.begin_training(gold_tuples, pipeline=pipeline, tok2vec=tok2vec)
+            labeller.begin_training(gold_tuples, pipeline=pipeline,
+                                    tok2vec=tok2vec)
             pipeline.append(labeller)
             self._multitasks.append(labeller)
 
     def __reduce__(self):
-        return (EntityRecognizer, (self.vocab, self.moves, self.model), None, None)
+        return (EntityRecognizer, (self.vocab, self.moves, self.model),
+                None, None)
 
 
 __all__ = ['Tagger', 'DependencyParser', 'EntityRecognizer', 'Tensorizer']
diff --git a/spacy/scorer.py b/spacy/scorer.py
index b1ce3faa4..673df132c 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -74,18 +74,21 @@ class Scorer(object):
     @property
     def scores(self):
         return {
-            'uas': self.uas, 'las': self.las,
-            'ents_p': self.ents_p, 'ents_r': self.ents_r, 'ents_f': self.ents_f,
+            'uas': self.uas,
+            'las': self.las,
+            'ents_p': self.ents_p,
+            'ents_r': self.ents_r,
+            'ents_f': self.ents_f,
             'tags_acc': self.tags_acc,
             'token_acc': self.token_acc
         }
 
     def score(self, tokens, gold, verbose=False, punct_labels=('p', 'punct')):
         assert len(tokens) == len(gold)
-
         gold_deps = set()
         gold_tags = set()
-        gold_ents = set(tags_to_entities([annot[-1] for annot in gold.orig_annot]))
+        gold_ents = set(tags_to_entities([annot[-1]
+                        for annot in gold.orig_annot]))
         for id_, word, tag, head, dep, ner in gold.orig_annot:
             gold_tags.add((id_, tag))
             if dep not in (None, "") and dep.lower() not in punct_labels:
diff --git a/spacy/strings.pyx b/spacy/strings.pyx
index e6926a75d..647f140bb 100644
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@@ -4,19 +4,15 @@ from __future__ import unicode_literals, absolute_import
 
 cimport cython
 from libc.string cimport memcpy
-from libc.stdint cimport uint64_t, uint32_t
-from murmurhash.mrmr cimport hash64, hash32
-from preshed.maps cimport map_iter, key_t
 from libc.stdint cimport uint32_t
+from murmurhash.mrmr cimport hash64, hash32
 import ujson
-import dill
 
 from .symbols import IDS as SYMBOLS_BY_STR
 from .symbols import NAMES as SYMBOLS_BY_INT
-
 from .typedefs cimport hash_t
-from . import util
 from .compat import json_dumps
+from . import util
 
 
 cpdef hash_t hash_string(unicode string) except 0:
@@ -195,7 +191,7 @@ cdef class StringStore:
         """Save the current state to a directory.
 
         path (unicode or Path): A path to a directory, which will be created if
-            it doesn't exist. Paths may be either strings or `Path`-like objects.
+            it doesn't exist. Paths may be either strings or Path-like objects.
         """
         path = util.ensure_path(path)
         strings = list(self)
@@ -225,7 +221,7 @@ cdef class StringStore:
         **exclude: Named attributes to prevent from being serialized.
         RETURNS (bytes): The serialized form of the `StringStore` object.
         """
-        return ujson.dumps(list(self))
+        return json_dumps(list(self))
 
     def from_bytes(self, bytes_data, **exclude):
         """Load state from a binary string.
diff --git a/spacy/symbols.pyx b/spacy/symbols.pyx
index 0e0337b6e..56422771a 100644
--- a/spacy/symbols.pyx
+++ b/spacy/symbols.pyx
@@ -1,8 +1,8 @@
 # coding: utf8
 #cython: optimize.unpack_method_calls=False
-
 from __future__ import unicode_literals
 
+
 IDS = {
     "": NIL,
     "IS_ALPHA": IS_ALPHA,
@@ -464,9 +464,11 @@ IDS = {
     "LAW": LAW
 }
 
+
 def sort_nums(x):
     return x[1]
 
+
 NAMES = [it[0] for it in sorted(IDS.items(), key=sort_nums)]
 # Unfortunate hack here, to work around problem with long cpdef enum
 # (which is generating an enormous amount of C++ in Cython 0.24+)
diff --git a/spacy/syntax/_beam_utils.pyx b/spacy/syntax/_beam_utils.pyx
index da4efefbc..54e72a0e8 100644
--- a/spacy/syntax/_beam_utils.pyx
+++ b/spacy/syntax/_beam_utils.pyx
@@ -2,7 +2,7 @@
 # cython: profile=True
 cimport numpy as np
 import numpy
-from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
+from cpython.ref cimport PyObject, Py_XDECREF
 from thinc.extra.search cimport Beam
 from thinc.extra.search import MaxViolation
 from thinc.typedefs cimport hash_t, class_t
@@ -11,7 +11,6 @@ from thinc.extra.search cimport MaxViolation
 from .transition_system cimport TransitionSystem, Transition
 from .stateclass cimport StateClass
 from ..gold cimport GoldParse
-from ..tokens.doc cimport Doc
 
 
 # These are passed as callbacks to thinc.search.Beam
@@ -50,7 +49,7 @@ cdef class ParserBeam(object):
     cdef public object dones
 
     def __init__(self, TransitionSystem moves, states, golds,
-            int width, float density):
+                 int width, float density):
         self.moves = moves
         self.states = states
         self.golds = golds
@@ -59,7 +58,8 @@ cdef class ParserBeam(object):
         cdef StateClass state, st
         for state in states:
             beam = Beam(self.moves.n_moves, width, density)
-            beam.initialize(self.moves.init_beam_state, state.c.length, state.c._sent)
+            beam.initialize(self.moves.init_beam_state, state.c.length,
+                            state.c._sent)
             for i in range(beam.width):
                 st = <StateClass>beam.at(i)
                 st.c.offset = state.c.offset
@@ -74,7 +74,8 @@ cdef class ParserBeam(object):
 
     @property
     def is_done(self):
-        return all(b.is_done or self.dones[i] for i, b in enumerate(self.beams))
+        return all(b.is_done or self.dones[i]
+                   for i, b in enumerate(self.beams))
 
     def __getitem__(self, i):
         return self.beams[i]
@@ -126,7 +127,8 @@ cdef class ParserBeam(object):
         for i in range(beam.size):
             state = <StateClass>beam.at(i)
             if not state.c.is_final():
-                self.moves.set_costs(beam.is_valid[i], beam.costs[i], state, gold)
+                self.moves.set_costs(beam.is_valid[i], beam.costs[i],
+                                     state, gold)
                 if follow_gold:
                     for j in range(beam.nr_class):
                         if beam.costs[i][j] >= 1:
@@ -146,7 +148,10 @@ def get_token_ids(states, int n_tokens):
         c_ids += ids.shape[1]
     return ids
 
+
 nr_update = 0
+
+
 def update_beam(TransitionSystem moves, int nr_feature, int max_steps,
                 states, golds,
                 state2vec, vec2scores,
@@ -167,23 +172,27 @@ def update_beam(TransitionSystem moves, int nr_feature, int max_steps,
         if pbeam.is_done and gbeam.is_done:
             break
         # The beam maps let us find the right row in the flattened scores
-        # arrays for each state. States are identified by (example id, history).
-        # We keep a different beam map for each step (since we'll have a flat
-        # scores array for each step). The beam map will let us take the per-state
-        # losses, and compute the gradient for each (step, state, class).
+        # arrays for each state. States are identified by (example id,
+        # history). We keep a different beam map for each step (since we'll
+        # have a flat scores array for each step). The beam map will let us
+        # take the per-state losses, and compute the gradient for each (step,
+        # state, class).
         beam_maps.append({})
         # Gather all states from the two beams in a list. Some stats may occur
         # in both beams. To figure out which beam each state belonged to,
         # we keep two lists of indices, p_indices and g_indices
-        states, p_indices, g_indices = get_states(pbeam, gbeam, beam_maps[-1], nr_update)
+        states, p_indices, g_indices = get_states(pbeam, gbeam, beam_maps[-1],
+                                                  nr_update)
         if not states:
             break
         # Now that we have our flat list of states, feed them through the model
         token_ids = get_token_ids(states, nr_feature)
         vectors, bp_vectors = state2vec.begin_update(token_ids, drop=drop)
         if hist_feats:
-            hists = numpy.asarray([st.history[:hist_feats] for st in states], dtype='i')
-            scores, bp_scores = vec2scores.begin_update((vectors, hists), drop=drop)
+            hists = numpy.asarray([st.history[:hist_feats] for st in states],
+                                  dtype='i')
+            scores, bp_scores = vec2scores.begin_update((vectors, hists),
+                                                        drop=drop)
         else:
             scores, bp_scores = vec2scores.begin_update(vectors, drop=drop)
 
@@ -192,8 +201,10 @@ def update_beam(TransitionSystem moves, int nr_feature, int max_steps,
 
         # Unpack the flat scores into lists for the two beams. The indices arrays
         # tell us which example and state the scores-row refers to.
-        p_scores = [numpy.ascontiguousarray(scores[indices], dtype='f') for indices in p_indices]
-        g_scores = [numpy.ascontiguousarray(scores[indices], dtype='f')  for indices in g_indices]
+        p_scores = [numpy.ascontiguousarray(scores[indices], dtype='f')
+                    for indices in p_indices]
+        g_scores = [numpy.ascontiguousarray(scores[indices], dtype='f')
+                    for indices in g_indices]
         # Now advance the states in the beams. The gold beam is contrained to
         # to follow only gold analyses.
         pbeam.advance(p_scores)
@@ -249,8 +260,7 @@ def get_states(pbeams, gbeams, beam_map, nr_update):
 
 
 def get_gradient(nr_class, beam_maps, histories, losses):
-    """
-    The global model assigns a loss to each parse. The beam scores
+    """The global model assigns a loss to each parse. The beam scores
     are additive, so the same gradient is applied to each action
     in the history. This gives the gradient of a single *action*
     for a beam state -- so we have "the gradient of loss for taking
@@ -270,7 +280,8 @@ def get_gradient(nr_class, beam_maps, histories, losses):
             if loss != 0.0 and not numpy.isnan(loss):
                 nr_step = max(nr_step, len(hist))
     for i in range(nr_step):
-        grads.append(numpy.zeros((max(beam_maps[i].values())+1, nr_class), dtype='f'))
+        grads.append(numpy.zeros((max(beam_maps[i].values())+1, nr_class),
+                                 dtype='f'))
     assert len(histories) == len(losses)
     for eg_id, hists in enumerate(histories):
         for loss, hist in zip(losses[eg_id], hists):
@@ -287,5 +298,3 @@ def get_gradient(nr_class, beam_maps, histories, losses):
                 grads[j][i, clas] += loss
                 key = key + tuple([clas])
     return grads
-
-
diff --git a/spacy/syntax/_state.pyx b/spacy/syntax/_state.pyx
index 83c831f0b..e69de29bb 100644
--- a/spacy/syntax/_state.pyx
+++ b/spacy/syntax/_state.pyx
@@ -1 +0,0 @@
-# test
diff --git a/spacy/syntax/arc_eager.pyx b/spacy/syntax/arc_eager.pyx
index 8adb8e52c..b3c9b5563 100644
--- a/spacy/syntax/arc_eager.pyx
+++ b/spacy/syntax/arc_eager.pyx
@@ -4,24 +4,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
-import ctypes
-from libc.stdint cimport uint32_t
-from libc.string cimport memcpy
+from cpython.ref cimport Py_INCREF
 from cymem.cymem cimport Pool
 from collections import OrderedDict
 from thinc.extra.search cimport Beam
-import numpy
 
 from .stateclass cimport StateClass
-from ._state cimport StateC, is_space_token
+from ._state cimport StateC
 from .nonproj import is_nonproj_tree
-from .transition_system cimport do_func_t, get_cost_func_t
 from .transition_system cimport move_cost_func_t, label_cost_func_t
-from ..gold cimport GoldParse
-from ..gold cimport GoldParseC
-from ..attrs cimport TAG, HEAD, DEP, ENT_IOB, ENT_TYPE, IS_SPACE, IS_PUNCT
-from ..lexeme cimport Lexeme
+from ..gold cimport GoldParse, GoldParseC
 from ..structs cimport TokenC
 
 
@@ -316,14 +308,13 @@ cdef class ArcEager(TransitionSystem):
 
     @classmethod
     def get_actions(cls, **kwargs):
-        actions = kwargs.get('actions',
-                    OrderedDict((
-                        (SHIFT, ['']),
-                        (REDUCE, ['']),
-                        (RIGHT, []),
-                        (LEFT, []),
-                        (BREAK, ['ROOT'])
-                    )))
+        actions = kwargs.get('actions', OrderedDict((
+            (SHIFT, ['']),
+            (REDUCE, ['']),
+            (RIGHT, []),
+            (LEFT, []),
+            (BREAK, ['ROOT']))
+        ))
         seen_actions = set()
         for label in kwargs.get('left_labels', []):
             if label.upper() != 'ROOT':
@@ -363,7 +354,8 @@ cdef class ArcEager(TransitionSystem):
             if gold.cand_to_gold[i] is None:
                 continue
             if state.safe_get(i).dep:
-                predicted.add((i, state.H(i), self.strings[state.safe_get(i).dep]))
+                predicted.add((i, state.H(i),
+                              self.strings[state.safe_get(i).dep]))
             else:
                 predicted.add((i, state.H(i), 'ROOT'))
             id_, word, tag, head, dep, ner = gold.orig_annot[gold.cand_to_gold[i]]
@@ -381,7 +373,8 @@ cdef class ArcEager(TransitionSystem):
         if not self.has_gold(gold):
             return None
         for i in range(gold.length):
-            if gold.heads[i] is None or gold.labels[i] is None: # Missing values
+            # Missing values
+            if gold.heads[i] is None or gold.labels[i] is None:
                 gold.c.heads[i] = i
                 gold.c.has_dep[i] = False
             else:
@@ -517,14 +510,15 @@ cdef class ArcEager(TransitionSystem):
             # Check projectivity --- leading cause
             if is_nonproj_tree(gold.heads):
                 raise ValueError(
-                    "Could not find a gold-standard action to supervise the dependency "
-                    "parser.\n"
-                    "Likely cause: the tree is non-projective (i.e. it has crossing "
-                    "arcs -- see spacy/syntax/nonproj.pyx for definitions)\n"
-                    "The ArcEager transition system only supports projective trees.\n"
-                    "To learn non-projective representations, transform the data "
-                    "before training and after parsing. Either pass make_projective=True "
-                    "to the GoldParse class, or use PseudoProjectivity.preprocess_training_data")
+                    "Could not find a gold-standard action to supervise the "
+                    "dependency parser. Likely cause: the tree is "
+                    "non-projective (i.e. it has crossing arcs -- see "
+                    "spacy/syntax/nonproj.pyx for definitions). The ArcEager "
+                    "transition system only supports projective trees. To "
+                    "learn non-projective representations, transform the data "
+                    "before training and after parsing. Either pass "
+                    "make_projective=True to the GoldParse class, or use "
+                    "spacy.syntax.nonproj.preprocess_training_data.")
             else:
                 print(gold.orig_annot)
                 print(gold.words)
@@ -532,12 +526,10 @@ cdef class ArcEager(TransitionSystem):
                 print(gold.labels)
                 print(gold.sent_starts)
                 raise ValueError(
-                    "Could not find a gold-standard action to supervise the dependency "
-                    "parser.\n"
-                    "The GoldParse was projective.\n"
-                    "The transition system has %d actions.\n"
-                    "State at failure:\n"
-                    "%s" % (self.n_moves, stcls.print_state(gold.words)))
+                    "Could not find a gold-standard action to supervise the"
+                    "dependency parser. The GoldParse was projective. The "
+                    "transition system has %d actions. State at failure: %s"
+                    % (self.n_moves, stcls.print_state(gold.words)))
         assert n_gold >= 1
 
     def get_beam_annot(self, Beam beam):
@@ -558,4 +550,3 @@ cdef class ArcEager(TransitionSystem):
                     deps[j].setdefault(dep, 0.0)
                     deps[j][dep] += prob
         return heads, deps
-
diff --git a/spacy/syntax/iterators.pxd b/spacy/syntax/iterators.pxd
deleted file mode 100644
index e69de29bb..000000000
diff --git a/spacy/syntax/iterators.pyx b/spacy/syntax/iterators.pyx
deleted file mode 100644
index 557616d18..000000000
--- a/spacy/syntax/iterators.pyx
+++ /dev/null
@@ -1,144 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from ..parts_of_speech cimport NOUN, PROPN, PRON, VERB, AUX
-
-
-def english_noun_chunks(obj):
-    """
-    Detect base noun phrases from a dependency parse.
-    Works on both Doc and Span.
-    """
-    labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj',
-              'attr', 'ROOT']
-    doc = obj.doc # Ensure works on both Doc and Span.
-    np_deps = [doc.vocab.strings.add(label) for label in labels]
-    conj = doc.vocab.strings.add('conj')
-    np_label = doc.vocab.strings.add('NP')
-    seen = set()
-    for i, word in enumerate(obj):
-        if word.pos not in (NOUN, PROPN, PRON):
-            continue
-        # Prevent nested chunks from being produced
-        if word.i in seen:
-            continue
-        if word.dep in np_deps:
-            if any(w.i in seen for w in word.subtree):
-                continue
-            seen.update(j for j in range(word.left_edge.i, word.i+1))
-            yield word.left_edge.i, word.i+1, np_label
-        elif word.dep == conj:
-            head = word.head
-            while head.dep == conj and head.head.i < head.i:
-                head = head.head
-            # If the head is an NP, and we're coordinated to it, we're an NP
-            if head.dep in np_deps:
-                if any(w.i in seen for w in word.subtree):
-                    continue
-                seen.update(j for j in range(word.left_edge.i, word.i+1))
-                yield word.left_edge.i, word.i+1, np_label
-
-
-# this iterator extracts spans headed by NOUNs starting from the left-most
-# syntactic dependent until the NOUN itself
-# for close apposition and measurement construction, the span is sometimes
-# extended to the right of the NOUN
-# example: "eine Tasse Tee" (a cup (of) tea) returns "eine Tasse Tee" and not
-# just "eine Tasse", same for "das Thema Familie"
-def german_noun_chunks(obj):
-    labels = ['sb', 'oa', 'da', 'nk', 'mo', 'ag', 'ROOT', 'root', 'cj', 'pd', 'og', 'app']
-    doc = obj.doc # Ensure works on both Doc and Span.
-    np_label = doc.vocab.strings.add('NP')
-    np_deps = set(doc.vocab.strings.add(label) for label in labels)
-    close_app = doc.vocab.strings.add('nk')
-
-    rbracket = 0
-    for i, word in enumerate(obj):
-        if i < rbracket:
-            continue
-        if word.pos in (NOUN, PROPN, PRON) and word.dep in np_deps:
-            rbracket = word.i+1
-            # try to extend the span to the right
-            # to capture close apposition/measurement constructions
-            for rdep in doc[word.i].rights:
-                if rdep.pos in (NOUN, PROPN) and rdep.dep == close_app:
-                    rbracket = rdep.i+1
-            yield word.left_edge.i, rbracket, np_label
-
-
-def es_noun_chunks(obj):
-    doc = obj.doc
-    np_label = doc.vocab.strings['NP']
-    left_labels = ['det', 'fixed', 'neg'] #['nunmod', 'det', 'appos', 'fixed']
-    right_labels = ['flat', 'fixed', 'compound', 'neg']
-    stop_labels = ['punct']
-    np_left_deps = [doc.vocab.strings[label] for label in left_labels]
-    np_right_deps = [doc.vocab.strings[label] for label in right_labels]
-    stop_deps = [doc.vocab.strings[label] for label in stop_labels]
-
-    def next_token(token):
-        try:
-            return token.nbor()
-        except:
-            return None
-
-    def noun_bounds(root):
-        def is_verb_token(token):
-            return token.pos in [VERB, AUX]
-
-        left_bound = root
-        for token in reversed(list(root.lefts)):
-            if token.dep in np_left_deps:
-                left_bound = token
-        right_bound = root
-        for token in root.rights:
-            if (token.dep in np_right_deps):
-                left, right = noun_bounds(token)
-                if list(filter(lambda t: is_verb_token(t) or t.dep in stop_deps,
-                               doc[left_bound.i: right.i])):
-                    break
-                else:
-                    right_bound = right
-        return left_bound, right_bound
-
-    token = doc[0]
-    while token and token.i < len(doc):
-        if token.pos in [PROPN, NOUN, PRON]:
-            left, right = noun_bounds(token)
-            yield left.i, right.i+1, np_label
-            token = right
-        token = next_token(token)
-
-
-def french_noun_chunks(obj):
-    labels = ['nsubj', 'nsubj:pass', 'obj', 'iobj', 'ROOT', 'appos', 'nmod', 'nmod:poss']
-    doc = obj.doc  # Ensure works on both Doc and Span.
-    np_deps = [doc.vocab.strings[label] for label in labels]
-    conj = doc.vocab.strings.add('conj')
-    np_label = doc.vocab.strings.add('NP')
-    seen = set()
-    for i, word in enumerate(obj):
-        if word.pos not in (NOUN, PROPN, PRON):
-            continue
-        # Prevent nested chunks from being produced
-        if word.i in seen:
-            continue
-        if word.dep in np_deps:
-            if any(w.i in seen for w in word.subtree):
-                continue
-            seen.update(j for j in range(word.left_edge.i, word.right_edge.i+1))
-            yield word.left_edge.i, word.right_edge.i+1, np_label
-        elif word.dep == conj:
-            head = word.head
-            while head.dep == conj and head.head.i < head.i:
-                head = head.head
-            # If the head is an NP, and we're coordinated to it, we're an NP
-            if head.dep in np_deps:
-                if any(w.i in seen for w in word.subtree):
-                    continue
-                seen.update(j for j in range(word.left_edge.i, word.right_edge.i+1))
-                yield word.left_edge.i, word.right_edge.i+1, np_label
-
-
-CHUNKERS = {'en': english_noun_chunks, 'de': german_noun_chunks,
-            'es': es_noun_chunks, 'fr': french_noun_chunks}
diff --git a/spacy/syntax/ner.pyx b/spacy/syntax/ner.pyx
index 5c4e42176..e2e242aea 100644
--- a/spacy/syntax/ner.pyx
+++ b/spacy/syntax/ner.pyx
@@ -4,17 +4,12 @@ from __future__ import unicode_literals
 from thinc.typedefs cimport weight_t
 from thinc.extra.search cimport Beam
 from collections import OrderedDict
-import numpy
-from thinc.neural.ops import NumpyOps
 
 from .stateclass cimport StateClass
 from ._state cimport StateC
 from .transition_system cimport Transition
 from .transition_system cimport do_func_t
-from ..structs cimport TokenC, Entity
-from ..gold cimport GoldParseC
-from ..gold cimport GoldParse
-from ..attrs cimport ENT_TYPE, ENT_IOB
+from ..gold cimport GoldParseC, GoldParse
 
 
 cdef enum:
@@ -69,15 +64,14 @@ cdef class BiluoPushDown(TransitionSystem):
 
     @classmethod
     def get_actions(cls, **kwargs):
-        actions = kwargs.get('actions',
-                    OrderedDict((
-                        (MISSING, ['']),
-                        (BEGIN, []),
-                        (IN, []),
-                        (LAST, []),
-                        (UNIT, []),
-                        (OUT, [''])
-                    )))
+        actions = kwargs.get('actions', OrderedDict((
+            (MISSING, ['']),
+            (BEGIN, []),
+            (IN, []),
+            (LAST, []),
+            (UNIT, []),
+            (OUT, [''])
+        )))
         seen_entities = set()
         for entity_type in kwargs.get('entity_types', []):
             if entity_type in seen_entities:
@@ -160,7 +154,7 @@ cdef class BiluoPushDown(TransitionSystem):
 
     cdef Transition lookup_transition(self, object name) except *:
         cdef attr_t label
-        if name == '-' or name == None:
+        if name == '-' or name is None:
             return Transition(clas=0, move=MISSING, label=0, score=0)
         elif name == '!O':
             return Transition(clas=0, move=ISNT, label=0, score=0)
@@ -328,8 +322,8 @@ cdef class In:
             return False
         elif preset_ent_iob == 3:
             return False
-        # TODO: Is this quite right?
-        # I think it's supposed to be ensuring the gazetteer matches are maintained
+        # TODO: Is this quite right? I think it's supposed to be ensuring the
+        # gazetteer matches are maintained
         elif st.B_(1).ent_iob != preset_ent_iob:
             return False
         # Don't allow entities to extend across sentence boundaries
@@ -354,10 +348,12 @@ cdef class In:
         if g_act == MISSING:
             return 0
         elif g_act == BEGIN:
-            # I, Gold B --> True (P of bad open entity sunk, R of this entity sunk)
+            # I, Gold B --> True
+            # (P of bad open entity sunk, R of this entity sunk)
             return 0
         elif g_act == IN:
-            # I, Gold I --> True (label forced by prev, if mismatch, P and R both sunk)
+            # I, Gold I --> True
+            # (label forced by prev, if mismatch, P and R both sunk)
             return 0
         elif g_act == LAST:
             # I, Gold L --> True iff this entity sunk and next tag == O
@@ -505,11 +501,3 @@ cdef class Out:
             return 1
         else:
             return 1
-
-
-class OracleError(Exception):
-    pass
-
-
-class UnknownMove(Exception):
-    pass
diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index 84b8e348f..1aa4443d0 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -4,79 +4,56 @@
 # coding: utf-8
 from __future__ import unicode_literals, print_function
 
-from collections import Counter, OrderedDict
+from collections import OrderedDict
 import ujson
 import json
-import contextlib
 import numpy
-
-from libc.math cimport exp
-cimport cython
 cimport cython.parallel
 import cytoolz
-import dill
-
 import numpy.random
 cimport numpy as np
-
-from libcpp.vector cimport vector
-from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
+from cpython.ref cimport PyObject, Py_XDECREF
 from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
-from libc.stdint cimport uint32_t, uint64_t
-from libc.string cimport memset, memcpy
-from libc.stdlib cimport malloc, calloc, free
-from thinc.typedefs cimport weight_t, class_t, feat_t, atom_t, hash_t
-from thinc.linear.avgtron cimport AveragedPerceptron
-from thinc.linalg cimport Vec, VecVec
-from thinc.structs cimport SparseArrayC, FeatureC, ExampleC
-from thinc.extra.eg cimport Example
+from libc.math cimport exp
+from libcpp.vector cimport vector
+from libc.string cimport memset
+from libc.stdlib cimport calloc, free
+from cymem.cymem cimport Pool
+from thinc.typedefs cimport weight_t, class_t, hash_t
 from thinc.extra.search cimport Beam
-
-from cymem.cymem cimport Pool, Address
-from murmurhash.mrmr cimport hash64
-from preshed.maps cimport MapStruct
-from preshed.maps cimport map_get
-
-from thinc.api import layerize, chain, clone, with_flatten
-from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu, SELU
+from thinc.api import chain, clone
+from thinc.v2v import Model, Maxout, Affine
 from thinc.misc import LayerNorm
-
-from thinc.neural.ops import NumpyOps, CupyOps
+from thinc.neural.ops import CupyOps
 from thinc.neural.util import get_array_module
+from thinc.linalg cimport Vec, VecVec
 
-from .. import util
-from ..util import get_async, get_cuda_stream
-from .._ml import zero_init, PrecomputableAffine
-from .._ml import Tok2Vec, doc2feats
-from .._ml import Residual, drop_layer, flatten
+from .._ml import zero_init, PrecomputableAffine, Tok2Vec, flatten
 from .._ml import link_vectors_to_models
-from .._ml import HistoryFeatures
 from ..compat import json_dumps, copy_array
-
+from ..tokens.doc cimport Doc
+from ..gold cimport GoldParse
+from .. import util
 from .stateclass cimport StateClass
 from ._state cimport StateC
-from . import nonproj
-from .transition_system import OracleError
-from .transition_system cimport TransitionSystem, Transition
-from ..structs cimport TokenC
-from ..tokens.doc cimport Doc
-from ..strings cimport StringStore
-from ..gold cimport GoldParse
-from ..attrs cimport ID, TAG, DEP, ORTH, NORM, PREFIX, SUFFIX, TAG
-from . import _beam_utils
+from .transition_system cimport Transition
+from . import _beam_utils, nonproj
 
 
 def get_templates(*args, **kwargs):
     return []
 
+
 DEBUG = False
+
+
 def set_debug(val):
     global DEBUG
     DEBUG = val
 
 
 cdef class precompute_hiddens:
-    '''Allow a model to be "primed" by pre-computing input features in bulk.
+    """Allow a model to be "primed" by pre-computing input features in bulk.
 
     This is used for the parser, where we want to take a batch of documents,
     and compute vectors for each (token, position) pair. These vectors can then
@@ -91,7 +68,7 @@ cdef class precompute_hiddens:
     so we can save the factor k. This also gives a nice CPU/GPU division:
     we can do all our hard maths up front, packed into large multiplications,
     and do the hard-to-program parsing on the CPU.
-    '''
+    """
     cdef int nF, nO, nP
     cdef bint _is_synchronized
     cdef public object ops
@@ -101,7 +78,8 @@ cdef class precompute_hiddens:
     cdef object _cuda_stream
     cdef object _bp_hiddens
 
-    def __init__(self, batch_size, tokvecs, lower_model, cuda_stream=None, drop=0.):
+    def __init__(self, batch_size, tokvecs, lower_model, cuda_stream=None,
+                 drop=0.):
         gpu_cached, bp_features = lower_model.begin_update(tokvecs, drop=drop)
         cdef np.ndarray cached
         if not isinstance(gpu_cached, numpy.ndarray):
@@ -122,8 +100,7 @@ cdef class precompute_hiddens:
         self._bp_hiddens = bp_features
 
     cdef const float* get_feat_weights(self) except NULL:
-        if not self._is_synchronized \
-        and self._cuda_stream is not None:
+        if not self._is_synchronized and self._cuda_stream is not None:
             self._cuda_stream.synchronize()
             self._is_synchronized = True
         return <float*>self._cached.data
@@ -248,10 +225,10 @@ cdef class Parser:
         depth = util.env_opt('parser_hidden_depth', cfg.get('hidden_depth', 1))
         if depth != 1:
             raise ValueError("Currently parser depth is hard-coded to 1.")
-        parser_maxout_pieces = util.env_opt('parser_maxout_pieces', cfg.get('maxout_pieces', 2))
-        #if parser_maxout_pieces != 2:
-        #    raise ValueError("Currently parser_maxout_pieces is hard-coded to 2")
-        token_vector_width = util.env_opt('token_vector_width', cfg.get('token_vector_width', 128))
+        parser_maxout_pieces = util.env_opt('parser_maxout_pieces',
+                                            cfg.get('maxout_pieces', 2))
+        token_vector_width = util.env_opt('token_vector_width',
+                                           cfg.get('token_vector_width', 128))
         hidden_width = util.env_opt('hidden_width', cfg.get('hidden_width', 200))
         embed_size = util.env_opt('embed_size', cfg.get('embed_size', 7000))
         hist_size = util.env_opt('history_feats', cfg.get('hist_size', 0))
@@ -289,23 +266,19 @@ cdef class Parser:
         return (tok2vec, lower, upper), cfg
 
     def __init__(self, Vocab vocab, moves=True, model=True, **cfg):
-        """
-        Create a Parser.
+        """Create a Parser.
 
-        Arguments:
-            vocab (Vocab):
-                The vocabulary object. Must be shared with documents to be processed.
-                The value is set to the .vocab attribute.
-            moves (TransitionSystem):
-                Defines how the parse-state is created, updated and evaluated.
-                The value is set to the .moves attribute unless True (default),
-                in which case a new instance is created with Parser.Moves().
-            model (object):
-                Defines how the parse-state is created, updated and evaluated.
-                The value is set to the .model attribute unless True (default),
-                in which case a new instance is created with Parser.Model().
-            **cfg:
-                Arbitrary configuration parameters. Set to the .cfg attribute
+        vocab (Vocab): The vocabulary object. Must be shared with documents
+            to be processed. The value is set to the `.vocab` attribute.
+        moves (TransitionSystem): Defines how the parse-state is created,
+            updated and evaluated. The value is set to the .moves attribute
+            unless True (default), in which case a new instance is created with
+            `Parser.Moves()`.
+        model (object): Defines how the parse-state is created, updated and
+            evaluated. The value is set to the .model attribute unless True
+            (default), in which case a new instance is created with
+            `Parser.Model()`.
+        **cfg: Arbitrary configuration parameters. Set to the `.cfg` attribute
         """
         self.vocab = vocab
         if moves is True:
@@ -331,13 +304,10 @@ cdef class Parser:
         return (Parser, (self.vocab, self.moves, self.model), None, None)
 
     def __call__(self, Doc doc, beam_width=None, beam_density=None):
-        """
-        Apply the parser or entity recognizer, setting the annotations onto the Doc object.
+        """Apply the parser or entity recognizer, setting the annotations onto
+        the `Doc` object.
 
-        Arguments:
-            doc (Doc): The document to be processed.
-        Returns:
-            None
+        doc (Doc): The document to be processed.
         """
         if beam_width is None:
             beam_width = self.cfg.get('beam_width', 1)
@@ -359,16 +329,13 @@ cdef class Parser:
 
     def pipe(self, docs, int batch_size=256, int n_threads=2,
              beam_width=None, beam_density=None):
-        """
-        Process a stream of documents.
+        """Process a stream of documents.
 
-        Arguments:
-            stream: The sequence of documents to process.
-            batch_size (int):
-                The number of documents to accumulate into a working set.
-            n_threads (int):
-                The number of threads with which to work on the buffer in parallel.
-        Yields (Doc): Documents, in order.
+        stream: The sequence of documents to process.
+        batch_size (int): Number of documents to accumulate into a working set.
+        n_threads (int): The number of threads with which to work on the buffer
+            in parallel.
+        YIELDS (Doc): Documents, in order.
         """
         if beam_width is None:
             beam_width = self.cfg.get('beam_width', 1)
@@ -385,8 +352,8 @@ cdef class Parser:
                     parse_states = self.parse_batch(subbatch)
                     beams = []
                 else:
-                    beams = self.beam_parse(subbatch,
-                                beam_width=beam_width, beam_density=beam_density)
+                    beams = self.beam_parse(subbatch, beam_width=beam_width,
+                                            beam_density=beam_density)
                     parse_states = []
                     for beam in beams:
                         parse_states.append(<StateClass>beam.at(0))
@@ -406,9 +373,9 @@ cdef class Parser:
         if isinstance(docs, Doc):
             docs = [docs]
 
-        cuda_stream = get_cuda_stream()
-        (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(docs, cuda_stream,
-                                                                            0.0)
+        cuda_stream = util.get_cuda_stream()
+        (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(
+            docs, cuda_stream, 0.0)
         nr_state = len(docs)
         nr_class = self.moves.n_moves
         nr_dim = tokvecs.shape[1]
@@ -422,7 +389,8 @@ cdef class Parser:
 
         feat_weights = state2vec.get_feat_weights()
         cdef int i
-        cdef np.ndarray hidden_weights = numpy.ascontiguousarray(vec2scores._layers[-1].W.T)
+        cdef np.ndarray hidden_weights = numpy.ascontiguousarray(
+            vec2scores._layers[-1].W.T)
         cdef np.ndarray hidden_bias = vec2scores._layers[-1].b
 
         hW = <float*>hidden_weights.data
@@ -450,6 +418,7 @@ cdef class Parser:
             with gil:
                 PyErr_SetFromErrno(MemoryError)
                 PyErr_CheckSignals()
+        cdef float feature
         while not state.is_final():
             state.set_context_tokens(token_ids, nr_feat)
             memset(vectors, 0, nr_hidden * nr_piece * sizeof(float))
@@ -489,9 +458,9 @@ cdef class Parser:
         cdef Doc doc
         cdef int nr_class = self.moves.n_moves
         cdef StateClass stcls, output
-        cuda_stream = get_cuda_stream()
-        (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(docs, cuda_stream,
-                                                                            0.0)
+        cuda_stream = util.get_cuda_stream()
+        (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(
+            docs, cuda_stream, 0.0)
         beams = []
         cdef int offset = 0
         cdef int j = 0
@@ -546,9 +515,7 @@ cdef class Parser:
         if isinstance(docs, Doc) and isinstance(golds, GoldParse):
             docs = [docs]
             golds = [golds]
-
-        cuda_stream = get_cuda_stream()
-
+        cuda_stream = util.get_cuda_stream()
         states, golds, max_steps = self._init_gold_batch(docs, golds)
         (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(docs, cuda_stream,
                                                                             drop)
@@ -563,7 +530,6 @@ cdef class Parser:
         n_steps = 0
         while todo:
             states, golds = zip(*todo)
-
             token_ids = self.get_token_ids(states)
             vector, bp_vector = state2vec.begin_update(token_ids, drop=0.0)
             if drop != 0:
@@ -585,8 +551,8 @@ cdef class Parser:
             and not isinstance(token_ids, state2vec.ops.xp.ndarray):
                 # Move token_ids and d_vector to GPU, asynchronously
                 backprops.append((
-                    get_async(cuda_stream, token_ids),
-                    get_async(cuda_stream, d_vector),
+                    util.get_async(cuda_stream, token_ids),
+                    util.get_async(cuda_stream, d_vector),
                     bp_vector
                 ))
             else:
@@ -619,15 +585,13 @@ cdef class Parser:
         states = self.moves.init_batch(docs)
         for gold in golds:
             self.moves.preprocess_gold(gold)
-
-        cuda_stream = get_cuda_stream()
-        (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(docs, cuda_stream, drop)
-
-        states_d_scores, backprops = _beam_utils.update_beam(self.moves, self.nr_feature, 500,
-                                        states, golds,
-                                        state2vec, vec2scores,
-                                        width, density, self.cfg.get('hist_size', 0),
-                                        drop=drop, losses=losses)
+        cuda_stream = util.get_cuda_stream()
+        (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(
+            docs, cuda_stream, drop)
+        states_d_scores, backprops = _beam_utils.update_beam(
+            self.moves, self.nr_feature, 500, states, golds, state2vec,
+            vec2scores, width, density, self.cfg.get('hist_size', 0),
+            drop=drop, losses=losses)
         backprop_lower = []
         cdef float batch_size = len(docs)
         for i, d_scores in enumerate(states_d_scores):
@@ -639,13 +603,14 @@ cdef class Parser:
             if isinstance(self.model[0].ops, CupyOps) \
             and not isinstance(ids, state2vec.ops.xp.ndarray):
                 backprop_lower.append((
-                    get_async(cuda_stream, ids),
-                    get_async(cuda_stream, d_vector),
+                    util.get_async(cuda_stream, ids),
+                    util.get_async(cuda_stream, d_vector),
                     bp_vectors))
             else:
                 backprop_lower.append((ids, d_vector, bp_vectors))
         d_tokvecs = self.model[0].ops.allocate(tokvecs.shape)
-        self._make_updates(d_tokvecs, bp_tokvecs, backprop_lower, sgd, cuda_stream)
+        self._make_updates(d_tokvecs, bp_tokvecs, backprop_lower, sgd,
+                           cuda_stream)
 
     def _init_gold_batch(self, whole_docs, whole_golds):
         """Make a square batch, of length equal to the shortest doc. A long
@@ -796,7 +761,8 @@ cdef class Parser:
     def begin_training(self, gold_tuples, pipeline=None, **cfg):
         if 'model' in cfg:
             self.model = cfg['model']
-        gold_tuples = nonproj.preprocess_training_data(gold_tuples, label_freq_cutoff=100)
+        gold_tuples = nonproj.preprocess_training_data(gold_tuples,
+                                                       label_freq_cutoff=100)
         actions = self.moves.get_actions(gold_parses=gold_tuples)
         for action, labels in actions.items():
             for label in labels:
diff --git a/spacy/syntax/nonproj.pyx b/spacy/syntax/nonproj.pyx
index 499effcda..404f1bc90 100644
--- a/spacy/syntax/nonproj.pyx
+++ b/spacy/syntax/nonproj.pyx
@@ -1,39 +1,37 @@
 # coding: utf-8
-"""
-Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005
+"""Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005
 for doing pseudo-projective parsing implementation uses the HEAD decoration
 scheme.
 """
 from __future__ import unicode_literals
+
 from copy import copy
 
-from ..tokens.doc cimport Doc
-from ..attrs import DEP, HEAD
 
 DELIMITER = '||'
 
 
 def ancestors(tokenid, heads):
-    # returns all words going from the word up the path to the root
-    # the path to root cannot be longer than the number of words in the sentence
-    # this function ends after at most len(heads) steps
-    # because it would otherwise loop indefinitely on cycles
+    # Returns all words going from the word up the path to the root. The path
+    # to root cannot be longer than the number of words in the  sentence. This
+    # function ends after at most len(heads) steps, because it would otherwise
+    # loop indefinitely on cycles.
     head = tokenid
     cnt = 0
     while heads[head] != head and cnt < len(heads):
         head = heads[head]
         cnt += 1
         yield head
-        if head == None:
+        if head is None:
             break
 
 
 def contains_cycle(heads):
-    # in an acyclic tree, the path from each word following
-    # the head relation upwards always ends at the root node
+    # in an acyclic tree, the path from each word following the head relation
+    # upwards always ends at the root node
     for tokenid in range(len(heads)):
         seen = set([tokenid])
-        for ancestor in ancestors(tokenid,heads):
+        for ancestor in ancestors(tokenid, heads):
             if ancestor in seen:
                 return seen
             seen.add(ancestor)
@@ -45,26 +43,26 @@ def is_nonproj_arc(tokenid, heads):
     # if there is a token k, h < k < d such that h is not
     # an ancestor of k. Same for h -> d, h > d
     head = heads[tokenid]
-    if head == tokenid: # root arcs cannot be non-projective
+    if head == tokenid:  # root arcs cannot be non-projective
         return False
-    elif head == None: # unattached tokens cannot be non-projective
+    elif head is None:  # unattached tokens cannot be non-projective
         return False
 
     start, end = (head+1, tokenid) if head < tokenid else (tokenid+1, head)
-    for k in range(start,end):
-        for ancestor in ancestors(k,heads):
-            if ancestor == None: # for unattached tokens/subtrees
+    for k in range(start, end):
+        for ancestor in ancestors(k, heads):
+            if ancestor is None:  # for unattached tokens/subtrees
                 break
-            elif ancestor == head: # normal case: k dominated by h
+            elif ancestor == head:  # normal case: k dominated by h
                 break
-        else: # head not in ancestors: d -> h is non-projective
+        else:  # head not in ancestors: d -> h is non-projective
             return True
     return False
 
 
 def is_nonproj_tree(heads):
     # a tree is non-projective if at least one arc is non-projective
-    return any( is_nonproj_arc(word,heads) for word in range(len(heads)) )
+    return any(is_nonproj_arc(word, heads) for word in range(len(heads)))
 
 
 def decompose(label):
@@ -81,32 +79,32 @@ def preprocess_training_data(gold_tuples, label_freq_cutoff=30):
     for raw_text, sents in gold_tuples:
         prepro_sents = []
         for (ids, words, tags, heads, labels, iob), ctnts in sents:
-            proj_heads,deco_labels = projectivize(heads,labels)
+            proj_heads, deco_labels = projectivize(heads, labels)
             # set the label to ROOT for each root dependent
-            deco_labels = [ 'ROOT' if head == i else deco_labels[i] for i,head in enumerate(proj_heads) ]
+            deco_labels = ['ROOT' if head == i else deco_labels[i]
+                           for i, head in enumerate(proj_heads)]
             # count label frequencies
             if label_freq_cutoff > 0:
                 for label in deco_labels:
                     if is_decorated(label):
-                        freqs[label] = freqs.get(label,0) + 1
-            prepro_sents.append(((ids,words,tags,proj_heads,deco_labels,iob), ctnts))
+                        freqs[label] = freqs.get(label, 0) + 1
+            prepro_sents.append(
+                ((ids, words, tags, proj_heads, deco_labels, iob), ctnts))
         preprocessed.append((raw_text, prepro_sents))
-
     if label_freq_cutoff > 0:
-        return _filter_labels(preprocessed,label_freq_cutoff,freqs)
+        return _filter_labels(preprocessed, label_freq_cutoff, freqs)
     return preprocessed
 
 
 def projectivize(heads, labels):
-    # use the algorithm by Nivre & Nilsson 2005
-    # assumes heads to be a proper tree, i.e. connected and cycle-free
-    # returns a new pair (heads,labels) which encode
-    # a projective and decorated tree
+    # Use the algorithm by Nivre & Nilsson 2005. Assumes heads to be a proper
+    # tree, i.e. connected and cycle-free. Returns a new pair (heads, labels)
+    # which encode a projective and decorated tree.
     proj_heads = copy(heads)
     smallest_np_arc = _get_smallest_nonproj_arc(proj_heads)
-    if smallest_np_arc == None: # this sentence is already projective
+    if smallest_np_arc is None:  # this sentence is already projective
         return proj_heads, copy(labels)
-    while smallest_np_arc != None:
+    while smallest_np_arc is not None:
         _lift(smallest_np_arc, proj_heads)
         smallest_np_arc = _get_smallest_nonproj_arc(proj_heads)
     deco_labels = _decorate(heads, proj_heads, labels)
@@ -114,24 +112,26 @@ def projectivize(heads, labels):
 
 
 def deprojectivize(tokens):
-    # reattach arcs with decorated labels (following HEAD scheme)
-    # for each decorated arc X||Y, search top-down, left-to-right,
-    # breadth-first until hitting a Y then make this the new head
+    # Reattach arcs with decorated labels (following HEAD scheme). For each
+    # decorated arc X||Y, search top-down, left-to-right, breadth-first until
+    # hitting a Y then make this the new head.
     for token in tokens:
         if is_decorated(token.dep_):
-            newlabel,headlabel = decompose(token.dep_)
-            newhead = _find_new_head(token,headlabel)
+            newlabel, headlabel = decompose(token.dep_)
+            newhead = _find_new_head(token, headlabel)
             token.head = newhead
             token.dep_ = newlabel
     return tokens
 
+
 def _decorate(heads, proj_heads, labels):
     # uses decoration scheme HEAD from Nivre & Nilsson 2005
     assert(len(heads) == len(proj_heads) == len(labels))
     deco_labels = []
-    for tokenid,head in enumerate(heads):
+    for tokenid, head in enumerate(heads):
         if head != proj_heads[tokenid]:
-            deco_labels.append('%s%s%s' % (labels[tokenid], DELIMITER, labels[head]))
+            deco_labels.append(
+                '%s%s%s' % (labels[tokenid], DELIMITER, labels[head]))
         else:
             deco_labels.append(labels[tokenid])
     return deco_labels
@@ -143,9 +143,9 @@ def _get_smallest_nonproj_arc(heads):
     # and ties are broken left to right
     smallest_size = float('inf')
     smallest_np_arc = None
-    for tokenid,head in enumerate(heads):
+    for tokenid, head in enumerate(heads):
         size = abs(tokenid-head)
-        if size < smallest_size and is_nonproj_arc(tokenid,heads):
+        if size < smallest_size and is_nonproj_arc(tokenid, heads):
             smallest_size = size
             smallest_np_arc = tokenid
     return smallest_np_arc
@@ -168,8 +168,10 @@ def _find_new_head(token, headlabel):
         next_queue = []
         for qtoken in queue:
             for child in qtoken.children:
-                if child.is_space: continue
-                if child == token: continue
+                if child.is_space:
+                    continue
+                if child == token:
+                    continue
                 if child.dep_ == headlabel:
                     return child
                 next_queue.append(child)
@@ -184,7 +186,10 @@ def _filter_labels(gold_tuples, cutoff, freqs):
     for raw_text, sents in gold_tuples:
         filtered_sents = []
         for (ids, words, tags, heads, labels, iob), ctnts in sents:
-            filtered_labels = [ decompose(label)[0] if freqs.get(label,cutoff) < cutoff else label for label in labels ]
-            filtered_sents.append(((ids,words,tags,heads,filtered_labels,iob), ctnts))
+            filtered_labels = [decompose(label)[0]
+                               if freqs.get(label, cutoff) < cutoff
+                               else label for label in labels]
+            filtered_sents.append(
+                ((ids, words, tags, heads, filtered_labels, iob), ctnts))
         filtered.append((raw_text, filtered_sents))
     return filtered
diff --git a/spacy/syntax/stateclass.pyx b/spacy/syntax/stateclass.pyx
index ddd1f558c..ea0ec77e5 100644
--- a/spacy/syntax/stateclass.pyx
+++ b/spacy/syntax/stateclass.pyx
@@ -2,17 +2,8 @@
 # cython: infer_types=True
 from __future__ import unicode_literals
 
-from libc.string cimport memcpy, memset
-from libc.stdint cimport uint32_t, uint64_t
 import numpy
 
-from ..vocab cimport EMPTY_LEXEME
-from ..structs cimport Entity
-from ..lexeme cimport Lexeme
-from ..symbols cimport punct
-from ..attrs cimport IS_SPACE
-from ..attrs cimport attr_id_t
-from ..tokens.token cimport Token
 from ..tokens.doc cimport Doc
 
 
diff --git a/spacy/syntax/transition_system.pyx b/spacy/syntax/transition_system.pyx
index 922fdf97c..c351636c4 100644
--- a/spacy/syntax/transition_system.pyx
+++ b/spacy/syntax/transition_system.pyx
@@ -2,17 +2,17 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
+from cpython.ref cimport Py_INCREF
 from cymem.cymem cimport Pool
 from thinc.typedefs cimport weight_t
-from collections import defaultdict, OrderedDict
+from collections import OrderedDict
 import ujson
 
-from .. import util
 from ..structs cimport TokenC
 from .stateclass cimport StateClass
-from ..attrs cimport TAG, HEAD, DEP, ENT_TYPE, ENT_IOB
 from ..typedefs cimport attr_t
+from ..compat import json_dumps
+from .. import util
 
 
 cdef weight_t MIN_SCORE = -90000
@@ -136,11 +136,12 @@ cdef class TransitionSystem:
             print([gold.c.ner[i].clas for i in range(gold.length)])
             print([gold.c.ner[i].move for i in range(gold.length)])
             print([gold.c.ner[i].label for i in range(gold.length)])
-            print("Self labels", [self.c[i].label for i in range(self.n_moves)])
+            print("Self labels",
+                  [self.c[i].label for i in range(self.n_moves)])
             raise ValueError(
                 "Could not find a gold-standard action to supervise "
-                "the entity recognizer\n"
-                "The transition system has %d actions." % (self.n_moves))
+                "the entity recognizer. The transition system has "
+                "%d actions." % (self.n_moves))
 
     def get_class_name(self, int clas):
         act = self.c[clas]
@@ -149,7 +150,7 @@ cdef class TransitionSystem:
     def add_action(self, int action, label_name):
         cdef attr_t label_id
         if not isinstance(label_name, int) and \
-        not isinstance(label_name, long):
+           not isinstance(label_name, long):
             label_id = self.strings.add(label_name)
         else:
             label_id = label_name
@@ -186,7 +187,7 @@ cdef class TransitionSystem:
                 'name': self.move_name(trans.move, trans.label)
             })
         serializers = {
-            'transitions': lambda: ujson.dumps(transitions),
+            'transitions': lambda: json_dumps(transitions),
             'strings': lambda: self.strings.to_bytes()
         }
         return util.to_bytes(serializers, exclude)
diff --git a/spacy/tagger.pxd b/spacy/tagger.pxd
deleted file mode 100644
index 6d2cef1f4..000000000
--- a/spacy/tagger.pxd
+++ /dev/null
@@ -1,17 +0,0 @@
-from thinc.linear.avgtron cimport AveragedPerceptron
-from thinc.extra.eg cimport Example
-from thinc.structs cimport ExampleC
-
-from .structs cimport TokenC
-from .vocab cimport Vocab
-
-
-cdef class TaggerModel(AveragedPerceptron):
-    cdef void set_featuresC(self, ExampleC* eg, const TokenC* tokens, int i) except *
- 
-
-cdef class Tagger:
-    cdef readonly Vocab vocab
-    cdef readonly TaggerModel model
-    cdef public dict freqs
-    cdef public object cfg
diff --git a/spacy/tagger.pyx b/spacy/tagger.pyx
deleted file mode 100644
index 0fadea15d..000000000
--- a/spacy/tagger.pyx
+++ /dev/null
@@ -1,253 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from collections import defaultdict
-
-from cymem.cymem cimport Pool
-from thinc.typedefs cimport atom_t
-from thinc.extra.eg cimport Example
-from thinc.structs cimport ExampleC
-from thinc.linear.avgtron cimport AveragedPerceptron
-from thinc.linalg cimport VecVec
-
-from .tokens.doc cimport Doc
-from .attrs cimport TAG
-from .gold cimport GoldParse
-from .attrs cimport *
-
-
-cpdef enum:
-    P2_orth
-    P2_cluster
-    P2_shape
-    P2_prefix
-    P2_suffix
-    P2_pos
-    P2_lemma
-    P2_flags
-
-    P1_orth
-    P1_cluster
-    P1_shape
-    P1_prefix
-    P1_suffix
-    P1_pos
-    P1_lemma
-    P1_flags
-
-    W_orth
-    W_cluster
-    W_shape
-    W_prefix
-    W_suffix
-    W_pos
-    W_lemma
-    W_flags
-
-    N1_orth
-    N1_cluster
-    N1_shape
-    N1_prefix
-    N1_suffix
-    N1_pos
-    N1_lemma
-    N1_flags
-
-    N2_orth
-    N2_cluster
-    N2_shape
-    N2_prefix
-    N2_suffix
-    N2_pos
-    N2_lemma
-    N2_flags
-
-    N_CONTEXT_FIELDS
-
-
-cdef class TaggerModel(AveragedPerceptron):
-    def update(self, Example eg):
-        self.time += 1
-        guess = eg.guess
-        best = VecVec.arg_max_if_zero(eg.c.scores, eg.c.costs, eg.c.nr_class)
-        if guess != best:
-            for feat in eg.c.features[:eg.c.nr_feat]:
-                self.update_weight(feat.key, best, -feat.value)
-                self.update_weight(feat.key, guess, feat.value)
-
-    cdef void set_featuresC(self, ExampleC* eg, const TokenC* tokens, int i) except *:
-        _fill_from_token(&eg.atoms[P2_orth], &tokens[i-2])
-        _fill_from_token(&eg.atoms[P1_orth], &tokens[i-1])
-        _fill_from_token(&eg.atoms[W_orth], &tokens[i])
-        _fill_from_token(&eg.atoms[N1_orth], &tokens[i+1])
-        _fill_from_token(&eg.atoms[N2_orth], &tokens[i+2])
-
-        eg.nr_feat = self.extracter.set_features(eg.features, eg.atoms)
-
-
-cdef inline void _fill_from_token(atom_t* context, const TokenC* t) nogil:
-    context[0] = t.lex.lower
-    context[1] = t.lex.cluster
-    context[2] = t.lex.shape
-    context[3] = t.lex.prefix
-    context[4] = t.lex.suffix
-    context[5] = t.tag
-    context[6] = t.lemma
-    if t.lex.flags & (1 << IS_ALPHA):
-        context[7] = 1
-    elif t.lex.flags & (1 << IS_PUNCT):
-        context[7] = 2
-    elif t.lex.flags & (1 << LIKE_URL):
-        context[7] = 3
-    elif t.lex.flags & (1 << LIKE_NUM):
-        context[7] = 4
-    else:
-        context[7] = 0
-
-
-cdef class Tagger:
-    """Annotate part-of-speech tags on Doc objects."""
-
-    def __init__(self, Vocab vocab, TaggerModel model=None, **cfg):
-        """Create a Tagger.
-
-        vocab (Vocab): The vocabulary object. Must be shared with documents to
-            be processed.
-        model (thinc.linear.AveragedPerceptron): The statistical model.
-        RETURNS (Tagger): The newly constructed object.
-        """
-        if model is None:
-            model = TaggerModel(cfg.get('features', self.feature_templates),
-                                L1=0.0)
-        self.vocab = vocab
-        self.model = model
-        self.model.l1_penalty = 0.0
-        # TODO: Move this to tag map
-        self.freqs = {TAG: defaultdict(int)}
-        for tag in self.tag_names:
-            self.freqs[TAG][self.vocab.strings[tag]] = 1
-        self.freqs[TAG][0] = 1
-        self.cfg = cfg
-
-    @property
-    def tag_names(self):
-        return self.vocab.morphology.tag_names
-
-    def __reduce__(self):
-        return (self.__class__, (self.vocab, self.model), None, None)
-
-    def tag_from_strings(self, Doc tokens, object tag_strs):
-        cdef int i
-        for i in range(tokens.length):
-            self.vocab.morphology.assign_tag(&tokens.c[i], tag_strs[i])
-        tokens.is_tagged = True
-        tokens._py_tokens = [None] * tokens.length
-
-    def __call__(self, Doc tokens):
-        """Apply the tagger, setting the POS tags onto the Doc object.
-
-        doc (Doc): The tokens to be tagged.
-        """
-        if tokens.length == 0:
-            return 0
-
-        cdef Pool mem = Pool()
-
-        cdef int i, tag
-        cdef Example eg = Example(nr_atom=N_CONTEXT_FIELDS,
-                                  nr_class=self.vocab.morphology.n_tags,
-                                  nr_feat=self.model.nr_feat)
-        for i in range(tokens.length):
-            if tokens.c[i].pos == 0:
-                self.model.set_featuresC(&eg.c, tokens.c, i)
-                self.model.set_scoresC(eg.c.scores,
-                    eg.c.features, eg.c.nr_feat)
-                guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class)
-                self.vocab.morphology.assign_tag_id(&tokens.c[i], guess)
-                eg.fill_scores(0, eg.c.nr_class)
-        tokens.is_tagged = True
-        tokens._py_tokens = [None] * tokens.length
-
-    def pipe(self, stream, batch_size=1000, n_threads=2):
-        """Tag a stream of documents.
-
-        Arguments:
-        stream: The sequence of documents to tag.
-        batch_size (int): The number of documents to accumulate into a working set.
-        n_threads (int): The number of threads with which to work on the buffer
-            in parallel, if the Matcher implementation supports multi-threading.
-        YIELDS (Doc): Documents, in order.
-        """
-        for doc in stream:
-            self(doc)
-            yield doc
-
-    def update(self, Doc tokens, GoldParse gold, itn=0):
-        """Update the statistical model, with tags supplied for the given document.
-
-        doc (Doc): The document to update on.
-        gold (GoldParse): Manager for the gold-standard tags.
-        RETURNS (int): Number of tags predicted correctly.
-        """
-        gold_tag_strs = gold.tags
-        assert len(tokens) == len(gold_tag_strs)
-        for tag in gold_tag_strs:
-            if tag != None and tag not in self.tag_names:
-                msg = ("Unrecognized gold tag: %s. tag_map.json must contain all "
-                       "gold tags, to maintain coarse-grained mapping.")
-                raise ValueError(msg % tag)
-        golds = [self.tag_names.index(g) if g is not None else -1 for g in gold_tag_strs]
-        cdef int correct = 0
-        cdef Pool mem = Pool()
-        cdef Example eg = Example(
-            nr_atom=N_CONTEXT_FIELDS,
-            nr_class=self.vocab.morphology.n_tags,
-            nr_feat=self.model.nr_feat)
-        for i in range(tokens.length):
-            self.model.set_featuresC(&eg.c, tokens.c, i)
-            eg.costs = [ 1 if golds[i] not in (c, -1) else 0 for c in xrange(eg.nr_class) ]
-            self.model.set_scoresC(eg.c.scores,
-                eg.c.features, eg.c.nr_feat)
-            self.model.update(eg)
-
-            self.vocab.morphology.assign_tag_id(&tokens.c[i], eg.guess)
-
-            correct += eg.cost == 0
-            self.freqs[TAG][tokens.c[i].tag] += 1
-            eg.fill_scores(0, eg.c.nr_class)
-            eg.fill_costs(0, eg.c.nr_class)
-        tokens.is_tagged = True
-        tokens._py_tokens = [None] * tokens.length
-        return correct
-
-
-    feature_templates = (
-        (W_orth,),
-        (P1_lemma, P1_pos),
-        (P2_lemma, P2_pos),
-        (N1_orth,),
-        (N2_orth,),
-
-        (W_suffix,),
-        (W_prefix,),
-
-        (P1_pos,),
-        (P2_pos,),
-        (P1_pos, P2_pos),
-        (P1_pos, W_orth),
-        (P1_suffix,),
-        (N1_suffix,),
-
-        (W_shape,),
-        (W_cluster,),
-        (N1_cluster,),
-        (N2_cluster,),
-        (P1_cluster,),
-        (P2_cluster,),
-
-        (W_flags,),
-        (N1_flags,),
-        (N2_flags,),
-        (P1_flags,),
-        (P2_flags,),
-    )
diff --git a/spacy/tests/parser/test_add_label.py b/spacy/tests/parser/test_add_label.py
index c3bceb106..9493452a1 100644
--- a/spacy/tests/parser/test_add_label.py
+++ b/spacy/tests/parser/test_add_label.py
@@ -40,6 +40,8 @@ def parser(vocab):
 def test_init_parser(parser):
     pass
 
+# TODO: This is flakey, because it depends on what the parser first learns.
+@pytest.mark.xfail
 def test_add_label(parser):
     doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])
     doc = parser(doc)
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index e865c60dd..ef31a5d5c 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -8,12 +8,11 @@ from cython.operator cimport preincrement as preinc
 from cymem.cymem cimport Pool
 from preshed.maps cimport PreshMap
 import regex as re
-
-from .strings cimport hash_string
-from . import util
 cimport cython
 
 from .tokens.doc cimport Doc
+from .strings cimport hash_string
+from . import util
 
 
 cdef class Tokenizer:
@@ -21,7 +20,7 @@ cdef class Tokenizer:
     boundaries.
     """
     def __init__(self, Vocab vocab, rules=None, prefix_search=None,
-            suffix_search=None, infix_finditer=None, token_match=None):
+                 suffix_search=None, infix_finditer=None, token_match=None):
         """Create a `Tokenizer`, to create `Doc` objects given unicode text.
 
         vocab (Vocab): A storage container for lexical types.
@@ -74,9 +73,8 @@ cdef class Tokenizer:
         RETURNS (Doc): A container for linguistic annotations.
         """
         if len(string) >= (2 ** 30):
-            raise ValueError(
-                "String is too long: %d characters. Max is 2**30." % len(string)
-            )
+            msg = "String is too long: %d characters. Max is 2**30."
+            raise ValueError(msg % len(string))
         cdef int length = len(string)
         cdef Doc doc = Doc(self.vocab)
         if length == 0:
@@ -122,8 +120,8 @@ cdef class Tokenizer:
         """Tokenize a stream of texts.
 
         texts: A sequence of unicode texts.
-        batch_size (int): The number of texts to accumulate in an internal buffer.
-        n_threads (int): The number of threads to use, if the implementation
+        batch_size (int): Number of texts to accumulate in an internal buffer.
+        n_threads (int): Number of threads to use, if the implementation
             supports multi-threading. The default tokenizer is single-threaded.
         YIELDS (Doc): A sequence of Doc objects, in order.
         """
@@ -232,8 +230,8 @@ cdef class Tokenizer:
                 if not matches:
                     tokens.push_back(self.vocab.get(tokens.mem, string), False)
                 else:
-                    # let's say we have dyn-o-mite-dave
-                    # the regex finds the start and end positions of the hyphens
+                    # let's say we have dyn-o-mite-dave - the regex finds the
+                    # start and end positions of the hyphens
                     start = 0
                     for match in matches:
                         infix_start = match.start()
@@ -293,8 +291,8 @@ cdef class Tokenizer:
         return list(self.infix_finditer(string))
 
     def find_prefix(self, unicode string):
-        """Find the length of a prefix that should be segmented from the string,
-        or None if no prefix rules match.
+        """Find the length of a prefix that should be segmented from the
+        string, or None if no prefix rules match.
 
         string (unicode): The string to segment.
         RETURNS (int): The length of the prefix if present, otherwise `None`.
@@ -305,8 +303,8 @@ cdef class Tokenizer:
         return (match.end() - match.start()) if match is not None else 0
 
     def find_suffix(self, unicode string):
-        """Find the length of a suffix that should be segmented from the string,
-        or None if no suffix rules match.
+        """Find the length of a suffix that should be segmented from the
+        string, or None if no suffix rules match.
 
         string (unicode): The string to segment.
         Returns (int): The length of the suffix if present, otherwise `None`.
@@ -326,8 +324,8 @@ cdef class Tokenizer:
 
         string (unicode): The string to specially tokenize.
         token_attrs (iterable): A sequence of dicts, where each dict describes
-            a token and its attributes. The `ORTH` fields of the attributes must
-            exactly match the string when they are concatenated.
+            a token and its attributes. The `ORTH` fields of the attributes
+            must exactly match the string when they are concatenated.
         """
         substrings = list(substrings)
         cached = <_Cached*>self.mem.alloc(1, sizeof(_Cached))
@@ -343,7 +341,7 @@ cdef class Tokenizer:
         """Save the current state to a directory.
 
         path (unicode or Path): A path to a directory, which will be created if
-            it doesn't exist. Paths may be either strings or `Path`-like objects.
+            it doesn't exist. Paths may be either strings or Path-like objects.
         """
         with path.open('wb') as file_:
             file_.write(self.to_bytes(**exclude))
diff --git a/spacy/tokens/__init__.py b/spacy/tokens/__init__.py
index bc3794126..b4815abd2 100644
--- a/spacy/tokens/__init__.py
+++ b/spacy/tokens/__init__.py
@@ -2,4 +2,4 @@ from .doc import Doc
 from .token import Token
 from .span import Span
 
-__all__ = [Doc, Token, Span]
+__all__ = ['Doc', 'Token', 'Span']
diff --git a/spacy/tokens/binder.pyx b/spacy/tokens/binder.pyx
deleted file mode 100644
index 0ee168579..000000000
--- a/spacy/tokens/binder.pyx
+++ /dev/null
@@ -1,21 +0,0 @@
-cdef class Binder:
-    def __init__(self, *docs):
-        pass
-
-    def __iter__(self):
-        pass
-
-    def __reduce__(self):
-        pass
-
-    def to_bytes(self):
-        pass
-
-    def from_bytes(cls, data):
-        pass
-
-    def to_disk(self):
-        pass
-
-    def from_disk(self, path):
-        pass
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 1bd61b256..7a2e95e4b 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -23,9 +23,9 @@ from ..lexeme cimport Lexeme, EMPTY_LEXEME
 from ..typedefs cimport attr_t, flags_t
 from ..attrs import intify_attrs, IDS
 from ..attrs cimport attr_id_t
-from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
-from ..attrs cimport LENGTH, POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB, ENT_TYPE
-from ..attrs cimport SENT_START
+from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, CLUSTER
+from ..attrs cimport LENGTH, POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB
+from ..attrs cimport ENT_TYPE, SENT_START
 from ..parts_of_speech cimport CCONJ, PUNCT, NOUN, univ_pos_t
 from ..util import normalize_slice
 from ..compat import is_config, copy_reg, pickle
@@ -78,24 +78,25 @@ def _get_chunker(lang):
 
 cdef class Doc:
     """A sequence of Token objects. Access sentences and named entities, export
-    annotations to numpy arrays, losslessly serialize to compressed binary strings.
-    The `Doc` object holds an array of `TokenC` structs. The Python-level
-    `Token` and `Span` objects are views of this array, i.e. they don't own
-    the data themselves.
+    annotations to numpy arrays, losslessly serialize to compressed binary
+    strings. The `Doc` object holds an array of `TokenC` structs. The
+    Python-level `Token` and `Span` objects are views of this array, i.e.
+    they don't own the data themselves.
 
     EXAMPLE: Construction 1
         >>> doc = nlp(u'Some text')
 
         Construction 2
         >>> from spacy.tokens import Doc
-        >>> doc = Doc(nlp.vocab, words=[u'hello', u'world', u'!'], spaces=[True, False, False])
+        >>> doc = Doc(nlp.vocab, words=[u'hello', u'world', u'!'],
+                      spaces=[True, False, False])
     """
     @classmethod
     def set_extension(cls, name, default=None, method=None,
                       getter=None, setter=None):
         nr_defined = sum(t is not None for t in (default, getter, setter, method))
         assert nr_defined == 1
-        Underscore.doc_extensions[name] = (default, method, getter, setter) 
+        Underscore.doc_extensions[name] = (default, method, getter, setter)
 
     @classmethod
     def get_extension(cls, name):
@@ -109,15 +110,14 @@ cdef class Doc:
                  orths_and_spaces=None):
         """Create a Doc object.
 
-        vocab (Vocab): A vocabulary object, which must match any models you want
-            to use (e.g. tokenizer, parser, entity recognizer).
+        vocab (Vocab): A vocabulary object, which must match any models you
+            want to use (e.g. tokenizer, parser, entity recognizer).
         words (list or None): A list of unicode strings to add to the document
             as words. If `None`, defaults to empty list.
         spaces (list or None): A list of boolean values, of the same length as
             words. True means that the word is followed by a space, False means
             it is not. If `None`, defaults to `[True]*len(words)`
         user_data (dict or None): Optional extra data to attach to the Doc.
- 
         RETURNS (Doc): The newly constructed object.
         """
         self.vocab = vocab
@@ -153,10 +153,10 @@ cdef class Doc:
                 spaces = [True] * len(words)
             elif len(spaces) != len(words):
                 raise ValueError(
-                    "Arguments 'words' and 'spaces' should be sequences of the "
-                    "same length, or 'spaces' should be left default at None. "
-                    "spaces should be a sequence of booleans, with True meaning "
-                    "that the word owns a ' ' character following it.")
+                    "Arguments 'words' and 'spaces' should be sequences of "
+                    "the same length, or 'spaces' should be left default at "
+                    "None. spaces should be a sequence of booleans, with True "
+                    "meaning that the word owns a ' ' character following it.")
             orths_and_spaces = zip(words, spaces)
         if orths_and_spaces is not None:
             for orth_space in orths_and_spaces:
@@ -166,7 +166,8 @@ cdef class Doc:
                 elif isinstance(orth_space, bytes):
                     raise ValueError(
                         "orths_and_spaces expects either List(unicode) or "
-                        "List((unicode, bool)). Got bytes instance: %s" % (str(orth_space)))
+                        "List((unicode, bool)). "
+                        "Got bytes instance: %s" % (str(orth_space)))
                 else:
                     orth, has_space = orth_space
                 # Note that we pass self.mem here --- we have ownership, if LexemeC
@@ -186,7 +187,8 @@ cdef class Doc:
     def __getitem__(self, object i):
         """Get a `Token` or `Span` object.
 
-        i (int or tuple) The index of the token, or the slice of the document to get.
+        i (int or tuple) The index of the token, or the slice of the document
+            to get.
         RETURNS (Token or Span): The token at `doc[i]]`, or the span at
             `doc[start : end]`.
 
@@ -199,11 +201,11 @@ cdef class Doc:
             >>> doc[start : end]]
             Get a `Span` object, starting at position `start` and ending at
             position `end`, where `start` and `end` are token indices. For
-            instance, `doc[2:5]` produces a span consisting of tokens 2, 3 and 4.
-            Stepped slices (e.g. `doc[start : end : step]`) are not supported,
-            as `Span` objects must be contiguous (cannot have gaps). You can use
-            negative indices and open-ended ranges, which have their normal
-            Python semantics.
+            instance, `doc[2:5]` produces a span consisting of tokens 2, 3 and
+            4. Stepped slices (e.g. `doc[start : end : step]`) are not
+            supported, as `Span` objects must be contiguous (cannot have gaps).
+            You can use negative indices and open-ended ranges, which have
+            their normal Python semantics.
         """
         if isinstance(i, slice):
             start, stop = normalize_slice(len(self), i.start, i.stop, i.step)
@@ -262,8 +264,10 @@ cdef class Doc:
         doc (Doc): The parent document.
         start (int): The index of the first character of the span.
         end (int): The index of the first character after the span.
-        label (uint64 or string): A label to attach to the Span, e.g. for named entities.
-        vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span.
+        label (uint64 or string): A label to attach to the Span, e.g. for
+            named entities.
+        vector (ndarray[ndim=1, dtype='float32']): A meaning representation of
+            the span.
         RETURNS (Span): The newly constructed object.
         """
         if not isinstance(label, int):
@@ -322,7 +326,8 @@ cdef class Doc:
             if self._vector is not None:
                 return self._vector
             elif not len(self):
-                self._vector = numpy.zeros((self.vocab.vectors_length,), dtype='f')
+                self._vector = numpy.zeros((self.vocab.vectors_length,),
+                                           dtype='f')
                 return self._vector
             elif self.has_vector:
                 vector = numpy.zeros((self.vocab.vectors_length,), dtype='f')
@@ -334,7 +339,8 @@ cdef class Doc:
                 self._vector = self.tensor.mean(axis=0)
                 return self._vector
             else:
-                return numpy.zeros((self.vocab.vectors_length,), dtype='float32')
+                return numpy.zeros((self.vocab.vectors_length,),
+                                   dtype='float32')
 
         def __set__(self, value):
             self._vector = value
@@ -377,13 +383,14 @@ cdef class Doc:
             return self.text
 
     property ents:
-        """Iterate over the entities in the document. Yields named-entity `Span`
-        objects, if the entity recognizer has been applied to the document.
+        """Iterate over the entities in the document. Yields named-entity
+        `Span` objects, if the entity recognizer has been applied to the
+        document.
 
         YIELDS (Span): Entities in the document.
 
-        EXAMPLE: Iterate over the span to get individual Token objects, or access
-            the label:
+        EXAMPLE: Iterate over the span to get individual Token objects,
+            or access the label:
 
             >>> tokens = nlp(u'Mr. Best flew to New York on Saturday morning.')
             >>> ents = list(tokens.ents)
@@ -419,7 +426,8 @@ cdef class Doc:
         def __set__(self, ents):
             # TODO:
             # 1. Allow negative matches
-            # 2. Ensure pre-set NERs are not over-written during statistical prediction
+            # 2. Ensure pre-set NERs are not over-written during statistical
+            #    prediction
             # 3. Test basic data-driven ORTH gazetteer
             # 4. Test more nuanced date and currency regex
             cdef int i
@@ -428,7 +436,7 @@ cdef class Doc:
                 # At this point we don't know whether the NER has run over the
                 # Doc. If the ent_iob is missing, leave it missing.
                 if self.c[i].ent_iob != 0:
-                    self.c[i].ent_iob = 2 # Means O. Non-O are set from ents.
+                    self.c[i].ent_iob = 2  # Means O. Non-O are set from ents.
             cdef attr_t ent_type
             cdef int start, end
             for ent_info in ents:
@@ -456,10 +464,11 @@ cdef class Doc:
 
     property noun_chunks:
         """Iterate over the base noun phrases in the document. Yields base
-        noun-phrase #[code Span] objects, if the document has been syntactically
-        parsed. A base noun phrase, or "NP chunk", is a noun phrase that does
-        not permit other NPs to be nested within it – so no NP-level
-        coordination, no prepositional phrases, and no relative clauses.
+        noun-phrase #[code Span] objects, if the document has been
+        syntactically parsed. A base noun phrase, or "NP chunk", is a noun
+        phrase that does not permit other NPs to be nested within it – so no
+        NP-level coordination, no prepositional phrases, and no relative
+        clauses.
 
         YIELDS (Span): Noun chunks in the document.
         """
@@ -467,12 +476,14 @@ cdef class Doc:
             if not self.is_parsed:
                 raise ValueError(
                     "noun_chunks requires the dependency parse, which "
-                    "requires data to be installed. For more info, see the "
+                    "requires a statistical model to be installed and loaded. "
+                    "For more info, see the "
                     "documentation: \n%s\n" % about.__docs_models__)
-            # Accumulate the result before beginning to iterate over it. This prevents
-            # the tokenisation from being changed out from under us during the iteration.
-            # The tricky thing here is that Span accepts its tokenisation changing,
-            # so it's okay once we have the Span objects. See Issue #375
+            # Accumulate the result before beginning to iterate over it. This
+            # prevents the tokenisation from being changed out from under us
+            # during the iteration. The tricky thing here is that Span accepts
+            # its tokenisation changing, so it's okay once we have the Span
+            # objects. See Issue #375.
             spans = []
             for start, end, label in self.noun_chunks_iterator(self):
                 spans.append(Span(self, start, end, label=label))
@@ -497,8 +508,9 @@ cdef class Doc:
 
             if not self.is_parsed:
                 raise ValueError(
-                    "sentence boundary detection requires the dependency parse, which "
-                    "requires data to be installed. For more info, see the "
+                    "Sentence boundary detection requires the dependency "
+                    "parse, which requires a statistical model to be "
+                    "installed and loaded. For more info, see the "
                     "documentation: \n%s\n" % about.__docs_models__)
             cdef int i
             start = 0
@@ -537,12 +549,11 @@ cdef class Doc:
     @cython.boundscheck(False)
     cpdef np.ndarray to_array(self, object py_attr_ids):
         """Export given token attributes to a numpy `ndarray`.
-
-	If `attr_ids` is a sequence of M attributes, the output array will
-	be of shape `(N, M)`, where N is the length of the `Doc`
-	(in tokens). If `attr_ids` is a single attribute, the output shape will
-	be (N,). You can specify attributes by integer ID (e.g. spacy.attrs.LEMMA)
-	or string name (e.g. 'LEMMA' or 'lemma').
+        If `attr_ids` is a sequence of M attributes, the output array will be
+        of shape `(N, M)`, where N is the length of the `Doc` (in tokens). If
+        `attr_ids` is a single attribute, the output shape will be (N,). You
+        can specify attributes by integer ID (e.g. spacy.attrs.LEMMA) or
+        string name (e.g. 'LEMMA' or 'lemma').
 
         attr_ids (list[]): A list of attributes (int IDs or string names).
         RETURNS (numpy.ndarray[long, ndim=2]): A feature matrix, with one row
@@ -566,18 +577,19 @@ cdef class Doc:
         # Allow strings, e.g. 'lemma' or 'LEMMA'
         py_attr_ids = [(IDS[id_.upper()] if hasattr(id_, 'upper') else id_)
                        for id_ in py_attr_ids]
-        # Make an array from the attributes --- otherwise our inner loop is Python
-        # dict iteration.
+        # Make an array from the attributes --- otherwise our inner loop is
+        # Python dict iteration.
         attr_ids = numpy.asarray(py_attr_ids, dtype=numpy.uint64)
-        output = numpy.ndarray(shape=(self.length, len(attr_ids)), dtype=numpy.uint64)
+        output = numpy.ndarray(shape=(self.length, len(attr_ids)),
+                               dtype=numpy.uint64)
         for i in range(self.length):
             for j, feature in enumerate(attr_ids):
                 output[i, j] = get_token_attr(&self.c[i], feature)
         # Handle 1d case
         return output if len(attr_ids) >= 2 else output.reshape((self.length,))
 
-
-    def count_by(self, attr_id_t attr_id, exclude=None, PreshCounter counts=None):
+    def count_by(self, attr_id_t attr_id, exclude=None,
+                 PreshCounter counts=None):
         """Count the frequencies of a given attribute. Produces a dict of
         `{attribute (int): count (ints)}` frequencies, keyed by the values of
         the given attribute ID.
@@ -641,13 +653,12 @@ cdef class Doc:
     def from_array(self, attrs, array):
         if SENT_START in attrs and HEAD in attrs:
             raise ValueError(
-                "Conflicting attributes specified in doc.from_array():\n"
+                "Conflicting attributes specified in doc.from_array(): "
                 "(HEAD, SENT_START)\n"
-                "The HEAD attribute currently sets sentence boundaries implicitly,\n"
-                "based on the tree structure. This means the HEAD attribute would "
-                "potentially override the sentence boundaries set by SENT_START.\n"
-                "See https://github.com/spacy-io/spaCy/issues/235 for details and "
-                "workarounds, and to propose solutions.")
+                "The HEAD attribute currently sets sentence boundaries "
+                "implicitly, based on the tree structure. This means the HEAD "
+                "attribute would potentially override the sentence boundaries "
+                "set by SENT_START.")
         cdef int i, col
         cdef attr_id_t attr_id
         cdef TokenC* tokens = self.c
@@ -675,18 +686,14 @@ cdef class Doc:
         return self
 
     def get_lca_matrix(self):
-        '''
-        Calculates the lowest common ancestor matrix
-        for a given Spacy doc.
-        Returns LCA matrix containing the integer index
-        of the ancestor, or -1 if no common ancestor is
-        found (ex if span excludes a necessary ancestor).
-        Apologies about the recursion, but the
-        impact on performance is negligible given
-        the natural limitations on the depth of a typical human sentence.
-        '''
+        """Calculates the lowest common ancestor matrix for a given `Doc`.
+        Returns LCA matrix containing the integer index of the ancestor, or -1
+        if no common ancestor is found (ex if span excludes a necessary
+        ancestor). Apologies about the recursion, but the impact on
+        performance is negligible given the natural limitations on the depth
+        of a typical human sentence.
+        """
         # Efficiency notes:
-        #
         # We can easily improve the performance here by iterating in Cython.
         # To loop over the tokens in Cython, the easiest way is:
         # for token in doc.c[:doc.c.length]:
@@ -705,7 +712,8 @@ cdef class Doc:
             elif (token_j.head == token_j) and (token_k.head == token_k):
                 lca_index = -1
             else:
-                lca_index = __pairwise_lca(token_j.head, token_k.head, lca_matrix)
+                lca_index = __pairwise_lca(token_j.head, token_k.head,
+                                           lca_matrix)
             lca_matrix[token_j.i][token_k.i] = lca_index
             lca_matrix[token_k.i][token_j.i] = lca_index
 
@@ -719,14 +727,13 @@ cdef class Doc:
                 token_k = self[k]
                 lca_matrix[j][k] = __pairwise_lca(token_j, token_k, lca_matrix)
                 lca_matrix[k][j] = lca_matrix[j][k]
-
         return lca_matrix
 
     def to_disk(self, path, **exclude):
         """Save the current state to a directory.
 
         path (unicode or Path): A path to a directory, which will be created if
-            it doesn't exist. Paths may be either strings or `Path`-like objects.
+            it doesn't exist. Paths may be either strings or Path-like objects.
         """
         with path.open('wb') as file_:
             file_.write(self.to_bytes(**exclude))
@@ -749,7 +756,7 @@ cdef class Doc:
         RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
             all annotations.
         """
-        array_head = [LENGTH,SPACY,TAG,LEMMA,HEAD,DEP,ENT_IOB,ENT_TYPE]
+        array_head = [LENGTH, SPACY, TAG, LEMMA, HEAD, DEP, ENT_IOB, ENT_TYPE]
         # Msgpack doesn't distinguish between lists and tuples, which is
         # vexing for user data. As a best guess, we *know* that within
         # keys, we must have tuples. In values we just have to hope
@@ -792,7 +799,8 @@ cdef class Doc:
         # keys, we must have tuples. In values we just have to hope
         # users don't mind getting a list instead of a tuple.
         if 'user_data' not in exclude and 'user_data_keys' in msg:
-            user_data_keys = msgpack.loads(msg['user_data_keys'], use_list=False)
+            user_data_keys = msgpack.loads(msg['user_data_keys'],
+                                           use_list=False)
             user_data_values = msgpack.loads(msg['user_data_values'])
             for key, value in zip(user_data_keys, user_data_values):
                 self.user_data[key] = value
@@ -819,14 +827,15 @@ cdef class Doc:
         return self
 
     def merge(self, int start_idx, int end_idx, *args, **attributes):
-        """Retokenize the document, such that the span at `doc.text[start_idx : end_idx]`
-        is merged into a single token. If `start_idx` and `end_idx `do not mark
-        start and end token boundaries, the document remains unchanged.
+        """Retokenize the document, such that the span at
+        `doc.text[start_idx : end_idx]` is merged into a single token. If
+        `start_idx` and `end_idx `do not mark start and end token boundaries,
+        the document remains unchanged.
 
-        start_idx (int): The character index of the start of the slice to merge.
-        end_idx (int): The character index after the end of the slice to merge.
+        start_idx (int): Character index of the start of the slice to merge.
+        end_idx (int): Character index after the end of the slice to merge.
         **attributes: Attributes to assign to the merged token. By default,
-            attributes are inherited from the syntactic root token of the span.
+            attributes are inherited from the syntactic root of the span.
         RETURNS (Token): The newly merged token, or `None` if the start and end
             indices did not fall at token boundaries.
         """
@@ -847,10 +856,11 @@ cdef class Doc:
                 attributes[ENT_TYPE] = attributes['ent_type']
         elif args:
             raise ValueError(
-                "Doc.merge received %d non-keyword arguments. "
-                "Expected either 3 arguments (deprecated), or 0 (use keyword arguments). "
+                "Doc.merge received %d non-keyword arguments. Expected either "
+                "3 arguments (deprecated), or 0 (use keyword arguments). "
                 "Arguments supplied:\n%s\n"
-                "Keyword arguments:%s\n" % (len(args), repr(args), repr(attributes)))
+                "Keyword arguments: %s\n" % (len(args), repr(args),
+                                             repr(attributes)))
 
         # More deprecated attribute handling =/
         if 'label' in attributes:
@@ -882,8 +892,9 @@ cdef class Doc:
                 Token.set_struct_attr(token, attr_name, attr_value)
         # Begin by setting all the head indices to absolute token positions
         # This is easier to work with for now than the offsets
-        # Before thinking of something simpler, beware the case where a dependency
-        # bridges over the entity. Here the alignment of the tokens changes.
+        # Before thinking of something simpler, beware the case where a
+        # dependency bridges over the entity. Here the alignment of the
+        # tokens changes.
         span_root = span.root.i
         token.dep = span.root.dep
         # We update token.lex after keeping span root and dep, since
@@ -932,8 +943,9 @@ cdef class Doc:
             >>> trees = doc.print_tree()
             >>> trees[1]
             {'modifiers': [
-                {'modifiers': [], 'NE': 'PERSON', 'word': 'Alice', 'arc': 'nsubj',
-                'POS_coarse': 'PROPN', 'POS_fine': 'NNP', 'lemma': 'Alice'},
+                {'modifiers': [], 'NE': 'PERSON', 'word': 'Alice',
+                'arc': 'nsubj', 'POS_coarse': 'PROPN', 'POS_fine': 'NNP',
+                'lemma': 'Alice'},
                 {'modifiers': [
                     {'modifiers': [], 'NE': '', 'word': 'the', 'arc': 'det',
                     'POS_coarse': 'DET', 'POS_fine': 'DT', 'lemma': 'the'}],
@@ -1008,7 +1020,7 @@ def pickle_doc(doc):
 
 def unpickle_doc(vocab, hooks_and_data, bytes_data):
     user_data, doc_hooks, span_hooks, token_hooks = dill.loads(hooks_and_data)
- 
+
     doc = Doc(vocab, user_data=user_data).from_bytes(bytes_data,
                                                      exclude='user_data')
     doc.user_hooks.update(doc_hooks)
@@ -1018,4 +1030,3 @@ def unpickle_doc(vocab, hooks_and_data, bytes_data):
 
 
 copy_reg.pickle(Doc, pickle_doc, unpickle_doc)
-
diff --git a/spacy/tokens/printers.py b/spacy/tokens/printers.py
index 4bc7099d7..92b2cd84c 100644
--- a/spacy/tokens/printers.py
+++ b/spacy/tokens/printers.py
@@ -43,8 +43,8 @@ def POS_tree(root, light=False, flat=False):
 
 
 def parse_tree(doc, light=False, flat=False):
-    """Makes a copy of the doc, then construct a syntactic parse tree, similar to
-    the one used in displaCy. Generates the POS tree for all sentences in a doc.
+    """Make a copy of the doc and construct a syntactic parse tree similar to
+    displaCy. Generates the POS tree for all sentences in a doc.
 
     doc (Doc): The doc for parsing.
     RETURNS (dict): The parse tree.
@@ -66,8 +66,9 @@ def parse_tree(doc, light=False, flat=False):
             'NE': '', 'word': 'ate', 'arc': 'ROOT', 'POS_coarse': 'VERB',
             'POS_fine': 'VBD', 'lemma': 'eat'}
     """
-    doc_clone  = Doc(doc.vocab, words=[w.text for w in doc])
+    doc_clone = Doc(doc.vocab, words=[w.text for w in doc])
     doc_clone.from_array([HEAD, TAG, DEP, ENT_IOB, ENT_TYPE],
                          doc.to_array([HEAD, TAG, DEP, ENT_IOB, ENT_TYPE]))
     merge_ents(doc_clone)  # merge the entities into single tokens first
-    return [POS_tree(sent.root, light=light, flat=flat) for sent in doc_clone.sents]
+    return [POS_tree(sent.root, light=light, flat=flat)
+            for sent in doc_clone.sents]
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index 963292fdb..efe511089 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -35,15 +35,16 @@ cdef class Span:
     def has_extension(cls, name):
         return name in Underscore.span_extensions
 
-    def __cinit__(self, Doc doc, int start, int end, attr_t label=0, vector=None,
-                  vector_norm=None):
+    def __cinit__(self, Doc doc, int start, int end, attr_t label=0,
+                  vector=None, vector_norm=None):
         """Create a `Span` object from the slice `doc[start : end]`.
 
         doc (Doc): The parent document.
         start (int): The index of the first token of the span.
         end (int): The index of the first token after the span.
         label (uint64): A label to attach to the Span, e.g. for named entities.
-        vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span.
+        vector (ndarray[ndim=1, dtype='float32']): A meaning representation
+            of the span.
         RETURNS (Span): The newly constructed object.
         """
         if not (0 <= start <= end <= len(doc)):
@@ -127,14 +128,17 @@ cdef class Span:
 
     @property
     def _(self):
+        """User space for adding custom attribute extensions."""
         return Underscore(Underscore.span_extensions, self,
                           start=self.start_char, end=self.end_char)
 
     def as_doc(self):
-        '''Create a Doc object view of the Span's data.
+        # TODO: fix
+        """Create a `Doc` object view of the Span's data. This is mostly
+        useful for C-typed interfaces.
 
-        This is mostly useful for C-typed interfaces.
-        '''
+        RETURNS (Doc): The `Doc` view of the span.
+        """
         cdef Doc doc = Doc(self.doc.vocab)
         doc.length = self.end-self.start
         doc.c = &self.doc.c[self.start]
@@ -162,7 +166,8 @@ cdef class Span:
             attributes are inherited from the syntactic root token of the span.
         RETURNS (Token): The newly merged token.
         """
-        return self.doc.merge(self.start_char, self.end_char, *args, **attributes)
+        return self.doc.merge(self.start_char, self.end_char, *args,
+                              **attributes)
 
     def similarity(self, other):
         """Make a semantic similarity estimate. The default estimate is cosine
@@ -179,24 +184,19 @@ cdef class Span:
         return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
 
     def get_lca_matrix(self):
-        '''
-        Calculates the lowest common ancestor matrix
-        for a given Spacy span.
-        Returns LCA matrix containing the integer index
-        of the ancestor, or -1 if no common ancestor is
-        found (ex if span excludes a necessary ancestor).
-        Apologies about the recursion, but the
-        impact on performance is negligible given
-        the natural limitations on the depth of a typical human sentence.
-        '''
-
+        """Calculates the lowest common ancestor matrix for a given `Span`.
+        Returns LCA matrix containing the integer index of the ancestor, or -1
+        if no common ancestor is found (ex if span excludes a necessary
+        ancestor). Apologies about the recursion, but the impact on
+        performance is negligible given the natural limitations on the depth
+        of a typical human sentence.
+        """
         def __pairwise_lca(token_j, token_k, lca_matrix, margins):
             offset = margins[0]
             token_k_head = token_k.head if token_k.head.i in range(*margins) else token_k
             token_j_head = token_j.head if token_j.head.i in range(*margins) else token_j
             token_j_i = token_j.i - offset
             token_k_i = token_k.i - offset
-
             if lca_matrix[token_j_i][token_k_i] != -2:
                 return lca_matrix[token_j_i][token_k_i]
             elif token_j == token_k:
@@ -209,23 +209,19 @@ cdef class Span:
                 lca_index = -1
             else:
                 lca_index = __pairwise_lca(token_j_head, token_k_head, lca_matrix, margins)
-
             lca_matrix[token_j_i][token_k_i] = lca_index
             lca_matrix[token_k_i][token_j_i] = lca_index
-
             return lca_index
 
         lca_matrix = numpy.empty((len(self), len(self)), dtype=numpy.int32)
         lca_matrix.fill(-2)
         margins = [self.start, self.end]
-
         for j in range(len(self)):
             token_j = self[j]
             for k in range(len(self)):
                 token_k = self[k]
                 lca_matrix[j][k] = __pairwise_lca(token_j, token_k, lca_matrix, margins)
                 lca_matrix[k][j] = lca_matrix[j][k]
-
         return lca_matrix
 
     cpdef np.ndarray to_array(self, object py_attr_ids):
@@ -266,10 +262,7 @@ cdef class Span:
             self.end = end + 1
 
     property sent:
-        """The sentence span that this span is a part of.
-
-        RETURNS (Span): The sentence span that the span is a part of.
-        """
+        """RETURNS (Span): The sentence span that the span is a part of."""
         def __get__(self):
             if 'sent' in self.doc.user_span_hooks:
                 return self.doc.user_span_hooks['sent'](self)
@@ -282,13 +275,10 @@ cdef class Span:
                 n += 1
                 if n >= self.doc.length:
                     raise RuntimeError
-            return self.doc[root.l_edge : root.r_edge + 1]
+            return self.doc[root.l_edge:root.r_edge + 1]
 
     property has_vector:
-        """A boolean value indicating whether a word vector is associated with
-        the object.
-
-        RETURNS (bool): Whether a word vector is associated with the object.
+        """RETURNS (bool): Whether a word vector is associated with the object.
         """
         def __get__(self):
             if 'has_vector' in self.doc.user_span_hooks:
@@ -310,10 +300,7 @@ cdef class Span:
             return self._vector
 
     property vector_norm:
-        """The L2 norm of the document's vector representation.
-
-        RETURNS (float): The L2 norm of the vector representation.
-        """
+        """RETURNS (float): The L2 norm of the vector representation."""
         def __get__(self):
             if 'vector_norm' in self.doc.user_span_hooks:
                 return self.doc.user_span_hooks['vector'](self)
@@ -327,7 +314,9 @@ cdef class Span:
             return self._vector_norm
 
     property sentiment:
-        # TODO: docstring
+        """RETURNS (float): A scalar value indicating the positivity or
+            negativity of the span.
+        """
         def __get__(self):
             if 'sentiment' in self.doc.user_span_hooks:
                 return self.doc.user_span_hooks['sentiment'](self)
@@ -335,10 +324,7 @@ cdef class Span:
                 return sum([token.sentiment for token in self]) / len(self)
 
     property text:
-        """A unicode representation of the span text.
-
-        RETURNS (unicode): The original verbatim text of the span.
-        """
+        """RETURNS (unicode): The original verbatim text of the span."""
         def __get__(self):
             text = self.text_with_ws
             if self[-1].whitespace_:
@@ -349,7 +335,8 @@ cdef class Span:
         """The text content of the span with a trailing whitespace character if
         the last token has one.
 
-        RETURNS (unicode): The text content of the span (with trailing whitespace).
+        RETURNS (unicode): The text content of the span (with trailing
+            whitespace).
         """
         def __get__(self):
             return u''.join([t.text_with_ws for t in self])
@@ -358,7 +345,8 @@ cdef class Span:
         """Yields base noun-phrase `Span` objects, if the document has been
         syntactically parsed. A base noun phrase, or "NP chunk", is a noun
         phrase that does not permit other NPs to be nested within it – so no
-        NP-level coordination, no prepositional phrases, and no relative clauses.
+        NP-level coordination, no prepositional phrases, and no relative
+        clauses.
 
         YIELDS (Span): Base noun-phrase `Span` objects
         """
@@ -366,12 +354,14 @@ cdef class Span:
             if not self.doc.is_parsed:
                 raise ValueError(
                     "noun_chunks requires the dependency parse, which "
-                    "requires data to be installed. For more info, see the "
+                    "requires a statistical model to be installed and loaded. "
+                    "For more info, see the "
                     "documentation: \n%s\n" % about.__docs_models__)
-            # Accumulate the result before beginning to iterate over it. This prevents
-            # the tokenisation from being changed out from under us during the iteration.
-            # The tricky thing here is that Span accepts its tokenisation changing,
-            # so it's okay once we have the Span objects. See Issue #375
+            # Accumulate the result before beginning to iterate over it. This
+            # prevents the tokenisation from being changed out from under us
+            # during the iteration. The tricky thing here is that Span accepts
+            # its tokenisation changing, so it's okay once we have the Span
+            # objects. See Issue #375
             spans = []
             cdef attr_t label
             for start, end, label in self.doc.noun_chunks_iterator(self):
@@ -385,9 +375,9 @@ cdef class Span:
 
         RETURNS (Token): The root token.
 
-        EXAMPLE: The root token has the shortest path to the root of the sentence
-            (or is the root itself). If multiple words are equally high in the
-            tree, the first word is taken. For example:
+        EXAMPLE: The root token has the shortest path to the root of the
+            sentence (or is the root itself). If multiple words are equally
+            high in the tree, the first word is taken. For example:
 
             >>> toks = nlp(u'I like New York in Autumn.')
 
@@ -437,11 +427,11 @@ cdef class Span:
                 if self.doc.c[i].head == 0:
                     return self.doc[i]
             # If we don't have a sentence root, we do something that's not so
-            # algorithmically clever, but I think should be quite fast, especially
-            # for short spans.
+            # algorithmically clever, but I think should be quite fast,
+            # especially for short spans.
             # For each word, we count the path length, and arg min this measure.
-            # We could use better tree logic to save steps here...But I think this
-            # should be okay.
+            # We could use better tree logic to save steps here...But I
+            # think this should be okay.
             cdef int current_best = self.doc.length
             cdef int root = -1
             for i in range(self.start, self.end):
@@ -463,7 +453,7 @@ cdef class Span:
         YIELDS (Token):A left-child of a token of the span.
         """
         def __get__(self):
-            for token in reversed(self): # Reverse, so we get the tokens in order
+            for token in reversed(self):  # Reverse, so we get tokens in order
                 for left in token.lefts:
                     if left.i < self.start:
                         yield left
@@ -480,6 +470,22 @@ cdef class Span:
                     if right.i >= self.end:
                         yield right
 
+    property n_lefts:
+        """RETURNS (int): The number of leftward immediate children of the
+            span, in the syntactic dependency parse.
+        """
+        # TODO: implement
+        def __get__(self):
+            raise NotImplementedError
+
+    property n_rights:
+        """RETURNS (int): The number of rightward immediate children of the
+            span, in the syntactic dependency parse.
+        """
+        # TODO: implement
+        def __get__(self):
+            raise NotImplementedError
+
     property subtree:
         """Tokens that descend from tokens in the span, but fall outside it.
 
@@ -493,66 +499,55 @@ cdef class Span:
                 yield from word.subtree
 
     property ent_id:
-        """An (integer) entity ID. Usually assigned by patterns in the `Matcher`.
-
-        RETURNS (uint64): The entity ID.
-        """
+        """RETURNS (uint64): The entity ID."""
         def __get__(self):
             return self.root.ent_id
 
         def __set__(self, hash_t key):
-            # TODO
             raise NotImplementedError(
-                "Can't yet set ent_id from Span. Vote for this feature on the issue "
-                "tracker: http://github.com/explosion/spaCy/issues")
+                "Can't yet set ent_id from Span. Vote for this feature on "
+                "the issue tracker: http://github.com/explosion/spaCy/issues")
 
     property ent_id_:
-        """A (string) entity ID. Usually assigned by patterns in the `Matcher`.
-
-        RETURNS (unicode): The entity ID.
-        """
+        """RETURNS (unicode): The (string) entity ID."""
         def __get__(self):
             return self.root.ent_id_
 
         def __set__(self, hash_t key):
-            # TODO
             raise NotImplementedError(
-                "Can't yet set ent_id_ from Span. Vote for this feature on the issue "
-                "tracker: http://github.com/explosion/spaCy/issues")
+                "Can't yet set ent_id_ from Span. Vote for this feature on the "
+                "issue tracker: http://github.com/explosion/spaCy/issues")
 
     property orth_:
-        # TODO: docstring
+        """Verbatim text content (identical to Span.text). Exists mostly for
+        consistency with other attributes.
+
+        RETURNS (unicode): The span's text."""
         def __get__(self):
-            return ''.join([t.string for t in self]).strip()
+            return ''.join([t.orth_ for t in self]).strip()
 
     property lemma_:
-        """The span's lemma.
-
-        RETURNS (unicode): The span's lemma.
-        """
+        """RETURNS (unicode): The span's lemma."""
         def __get__(self):
             return ' '.join([t.lemma_ for t in self]).strip()
 
     property upper_:
-        # TODO: docstring
+        """Deprecated. Use Span.text.upper() instead."""
         def __get__(self):
-            return ''.join([t.string.upper() for t in self]).strip()
+            return ''.join([t.text_with_ws.upper() for t in self]).strip()
 
     property lower_:
-        # TODO: docstring
+        """Deprecated. Use Span.text.lower() instead."""
         def __get__(self):
-            return ''.join([t.string.lower() for t in self]).strip()
+            return ''.join([t.text_with_ws.lower() for t in self]).strip()
 
     property string:
-        # TODO: docstring
+        """Deprecated: Use Span.text_with_ws instead."""
         def __get__(self):
-            return ''.join([t.string for t in self])
+            return ''.join([t.text_with_ws for t in self])
 
     property label_:
-        """The span's label.
-
-        RETURNS (unicode): The span's label.
-        """
+        """RETURNS (unicode): The span's label."""
         def __get__(self):
             return self.doc.vocab.strings[self.label]
 
@@ -570,7 +565,8 @@ cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1:
         n += 1
         if n >= sent_length:
             raise RuntimeError(
-                "Array bounds exceeded while searching for root word. This likely "
-                "means the parse tree is in an invalid state. Please report this "
-                "issue here: http://github.com/explosion/spaCy/issues")
+                "Array bounds exceeded while searching for root word. This "
+                "likely means the parse tree is in an invalid state. Please "
+                "report this issue here: "
+                "http://github.com/explosion/spaCy/issues")
     return n
diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx
index 514934ca7..fa07d0e9e 100644
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@@ -14,17 +14,18 @@ from ..typedefs cimport hash_t
 from ..lexeme cimport Lexeme
 from .. import parts_of_speech
 from ..attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE
-from ..attrs cimport IS_BRACKET, IS_QUOTE, IS_LEFT_PUNCT, IS_RIGHT_PUNCT, IS_OOV
-from ..attrs cimport IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM, LIKE_EMAIL, IS_STOP
-from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
-from ..attrs cimport LEMMA, POS, TAG, DEP
+from ..attrs cimport IS_BRACKET, IS_QUOTE, IS_LEFT_PUNCT, IS_RIGHT_PUNCT
+from ..attrs cimport IS_OOV, IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM, LIKE_EMAIL
+from ..attrs cimport IS_STOP, ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX
+from ..attrs cimport LENGTH, CLUSTER, LEMMA, POS, TAG, DEP
 from ..compat import is_config
 from .. import about
 from .underscore import Underscore
 
 
 cdef class Token:
-    """An individual token – i.e. a word, punctuation symbol, whitespace, etc."""
+    """An individual token – i.e. a word, punctuation symbol, whitespace,
+    etc."""
     @classmethod
     def set_extension(cls, name, default=None, method=None,
                       getter=None, setter=None):
@@ -144,37 +145,33 @@ cdef class Token:
             return self.doc.user_token_hooks['similarity'](self)
         if self.vector_norm == 0 or other.vector_norm == 0:
             return 0.0
-        return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
+        return (numpy.dot(self.vector, other.vector) /
+                (self.vector_norm * other.vector_norm))
 
     property lex_id:
-        """ID of the token's lexical type.
-
-        RETURNS (int): ID of the token's lexical type."""
+        """RETURNS (int): Sequential ID of the token's lexical type."""
         def __get__(self):
             return self.c.lex.id
 
     property rank:
-        # TODO: add docstring
+        """RETURNS (int): Sequential ID of the token's lexical type, used to
+        index into tables, e.g. for word vectors."""
         def __get__(self):
             return self.c.lex.id
 
     property string:
+        """Deprecated: Use Token.text_with_ws instead."""
         def __get__(self):
             return self.text_with_ws
 
     property text:
-        """A unicode representation of the token text.
-
-        RETURNS (unicode): The original verbatim text of the token.
-        """
+        """RETURNS (unicode): The original verbatim text of the token."""
         def __get__(self):
             return self.orth_
 
     property text_with_ws:
-        """The text content of the token with a trailing whitespace character if
-        it has one.
-
-        RETURNS (unicode): The text content of the span (with trailing whitespace).
+        """RETURNS (unicode): The text content of the span (with trailing
+            whitespace).
         """
         def __get__(self):
             cdef unicode orth = self.vocab.strings[self.c.lex.orth]
@@ -184,74 +181,104 @@ cdef class Token:
                 return orth
 
     property prob:
+        """RETURNS (float): Smoothed log probability estimate of token type."""
         def __get__(self):
             return self.c.lex.prob
 
     property sentiment:
+        """RETURNS (float): A scalar value indicating the positivity or
+            negativity of the token."""
         def __get__(self):
             if 'sentiment' in self.doc.user_token_hooks:
                 return self.doc.user_token_hooks['sentiment'](self)
             return self.c.lex.sentiment
 
     property lang:
+        """RETURNS (uint64): ID of the language of the parent document's
+            vocabulary.
+        """
         def __get__(self):
             return self.c.lex.lang
 
     property idx:
+        """RETURNS (int): The character offset of the token within the parent
+            document.
+        """
         def __get__(self):
             return self.c.idx
 
     property cluster:
+        """RETURNS (int): Brown cluster ID."""
         def __get__(self):
             return self.c.lex.cluster
 
     property orth:
+        """RETURNS (uint64): ID of the verbatim text content."""
         def __get__(self):
             return self.c.lex.orth
 
     property lower:
+        """RETURNS (uint64): ID of the lowercase token text."""
         def __get__(self):
             return self.c.lex.lower
 
     property norm:
+        """RETURNS (uint64): ID of the token's norm, i.e. a normalised form of
+            the token text. Usually set in the language's tokenizer exceptions
+            or norm exceptions.
+        """
         def __get__(self):
             return self.c.lex.norm
 
     property shape:
+        """RETURNS (uint64): ID of the token's shape, a transform of the
+            tokens's string, to show orthographic features (e.g. "Xxxx", "dd").
+        """
         def __get__(self):
             return self.c.lex.shape
 
     property prefix:
+        """RETURNS (uint64): ID of a length-N substring from the start of the
+            token. Defaults to `N=1`.
+        """
         def __get__(self):
             return self.c.lex.prefix
 
     property suffix:
+        """RETURNS (uint64): ID of a length-N substring from the end of the
+            token. Defaults to `N=3`.
+        """
         def __get__(self):
             return self.c.lex.suffix
 
     property lemma:
-        """Base form of the word, with no inflectional suffixes.
-
-        RETURNS (uint64): Token lemma.
+        """RETURNS (uint64): ID of the base form of the word, with no
+            inflectional suffixes.
         """
         def __get__(self):
             return self.c.lemma
+
         def __set__(self, attr_t lemma):
             self.c.lemma = lemma
 
     property pos:
+        """RETURNS (uint64): ID of coarse-grained part-of-speech tag."""
         def __get__(self):
             return self.c.pos
 
     property tag:
+        """RETURNS (uint64): ID of fine-grained part-of-speech tag."""
         def __get__(self):
             return self.c.tag
+
         def __set__(self, attr_t tag):
             self.vocab.morphology.assign_tag(self.c, tag)
 
     property dep:
+        """RETURNS (uint64): ID of syntactic dependency label."""
         def __get__(self):
             return self.c.dep
+
         def __set__(self, attr_t label):
             self.c.dep = label
 
@@ -292,23 +319,29 @@ cdef class Token:
             return numpy.sqrt((vector ** 2).sum())
 
     property n_lefts:
+        """RETURNS (int): The number of leftward immediate children of the
+            word, in the syntactic dependency parse.
+        """
         def __get__(self):
             return self.c.l_kids
 
     property n_rights:
+        """RETURNS (int): The number of rightward immediate children of the
+            word, in the syntactic dependency parse.
+        """
         def __get__(self):
             return self.c.r_kids
 
     property sent_start:
+        # TODO: fix and document
         def __get__(self):
             return self.c.sent_start
 
         def __set__(self, value):
             if self.doc.is_parsed:
                 raise ValueError(
-                    'Refusing to write to token.sent_start if its document is parsed, '
-                    'because this may cause inconsistent state. '
-                    'See https://github.com/spacy-io/spaCy/issues/235 for workarounds.')
+                    "Refusing to write to token.sent_start if its document "
+                    "is parsed, because this may cause inconsistent state.")
             if value is None:
                 self.c.sent_start = 0
             elif value is True:
@@ -316,15 +349,16 @@ cdef class Token:
             elif value is False:
                 self.c.sent_start = -1
             else:
-                raise ValueError("Invalid value for token.sent_start -- must be one of "
-                                 "None, True, False")
+                raise ValueError("Invalid value for token.sent_start. Must be "
+                                 "one of: None, True, False")
 
     property lefts:
+        """The leftward immediate children of the word, in the syntactic
+        dependency parse.
+
+        YIELDS (Token): A left-child of the token.
+        """
         def __get__(self):
-            """
-            The leftward immediate children of the word, in the syntactic
-            dependency parse.
-            """
             cdef int nr_iter = 0
             cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge)
             while ptr < self.c:
@@ -334,15 +368,16 @@ cdef class Token:
                 nr_iter += 1
                 # This is ugly, but it's a way to guard out infinite loops
                 if nr_iter >= 10000000:
-                    raise RuntimeError(
-                        "Possibly infinite loop encountered while looking for token.lefts")
+                    raise RuntimeError("Possibly infinite loop encountered "
+                                       "while looking for token.lefts")
 
     property rights:
+        """The rightward immediate children of the word, in the syntactic
+        dependency parse.
+
+        YIELDS (Token): A right-child of the token.
+        """
         def __get__(self):
-            """
-            The rightward immediate children of the word, in the syntactic
-            dependency parse.
-            """
             cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i)
             tokens = []
             cdef int nr_iter = 0
@@ -352,27 +387,26 @@ cdef class Token:
                 ptr -= 1
                 nr_iter += 1
                 if nr_iter >= 10000000:
-                    raise RuntimeError(
-                        "Possibly infinite loop encountered while looking for token.rights")
+                    raise RuntimeError("Possibly infinite loop encountered "
+                                       "while looking for token.rights")
             tokens.reverse()
             for t in tokens:
                 yield t
 
     property children:
-        """
-        A sequence of the token's immediate syntactic children.
+        """A sequence of the token's immediate syntactic children.
 
-        Yields: Token A child token such that child.head==self
+        YIELDS (Token): A child token such that child.head==self
         """
         def __get__(self):
             yield from self.lefts
             yield from self.rights
 
     property subtree:
-        """
-        A sequence of all the token's syntactic descendents.
+        """A sequence of all the token's syntactic descendents.
 
-        Yields: Token A descendent token such that self.is_ancestor(descendent)
+        YIELDS (Token): A descendent token such that
+            `self.is_ancestor(descendent)`.
         """
         def __get__(self):
             for word in self.lefts:
@@ -422,18 +456,17 @@ cdef class Token:
         """
         if self.doc is not descendant.doc:
             return False
-        return any( ancestor.i == self.i for ancestor in descendant.ancestors )
+        return any(ancestor.i == self.i for ancestor in descendant.ancestors)
 
     property head:
         """The syntactic parent, or "governor", of this token.
 
-        RETURNS (Token): The token head.
+        RETURNS (Token): The token predicted by the parser to be the head of
+            the current token.
         """
         def __get__(self):
-            """The token predicted by the parser to be the head of the current
-            token.
-            """
             return self.doc[self.i + self.c.head]
+
         def __set__(self, Token new_head):
             # this function sets the head of self to new_head
             # and updates the counters for left/right dependents
@@ -453,16 +486,18 @@ cdef class Token:
             cdef Token anc, child
 
             # update number of deps of old head
-            if self.c.head > 0: # left dependent
+            if self.c.head > 0:  # left dependent
                 old_head.c.l_kids -= 1
                 if self.c.l_edge == old_head.c.l_edge:
-                    # the token dominates the left edge so the left edge of the head
-                    # may change when the token is reattached
-                    # it may not change if the new head is a descendant of the current head
+                    # the token dominates the left edge so the left edge of
+                    # the  head may change when the token is reattached, it may
+                    # not change if the new head is a descendant of the current
+                    # head
 
                     new_edge = self.c.l_edge
-                    # the new l_edge is the left-most l_edge on any of the other dependents
-                    # where the l_edge is left of the head, otherwise it is the head
+                    # the new l_edge is the left-most l_edge on any of the
+                    # other dependents where the l_edge is left of the head,
+                    # otherwise it is the head
                     if not is_desc:
                         new_edge = old_head.i
                         for child in old_head.children:
@@ -472,14 +507,15 @@ cdef class Token:
                                 new_edge = child.c.l_edge
                         old_head.c.l_edge = new_edge
 
-                    # walk up the tree from old_head and assign new l_edge to ancestors
-                    # until an ancestor already has an l_edge that's further left
+                    # walk up the tree from old_head and assign new l_edge to
+                    # ancestors until an ancestor already has an l_edge that's
+                    # further left
                     for anc in old_head.ancestors:
                         if anc.c.l_edge <= new_edge:
                             break
                         anc.c.l_edge = new_edge
 
-            elif self.c.head < 0: # right dependent
+            elif self.c.head < 0:  # right dependent
                 old_head.c.r_kids -= 1
                 # do the same thing as for l_edge
                 if self.c.r_edge == old_head.c.r_edge:
@@ -500,7 +536,7 @@ cdef class Token:
                         anc.c.r_edge = new_edge
 
             # update number of deps of new head
-            if rel_newhead_i > 0: # left dependent
+            if rel_newhead_i > 0:  # left dependent
                 new_head.c.l_kids += 1
                 # walk up the tree from new head and set l_edge to self.l_edge
                 # until you hit a token with an l_edge further to the left
@@ -511,7 +547,7 @@ cdef class Token:
                             break
                         anc.c.l_edge = self.c.l_edge
 
-            elif rel_newhead_i < 0: # right dependent
+            elif rel_newhead_i < 0:  # right dependent
                 new_head.c.r_kids += 1
                 # do the same as for l_edge
                 if self.c.r_edge > new_head.c.r_edge:
@@ -542,12 +578,10 @@ cdef class Token:
                             yield from word.conjuncts
 
     property ent_type:
-        """Named entity type.
-
-        RETURNS (uint64): Named entity type.
-        """
+        """RETURNS (uint64): Named entity type."""
         def __get__(self):
             return self.c.ent_type
+
         def __set__(self, ent_type):
             self.c.ent_type = ent_type
 
@@ -561,19 +595,17 @@ cdef class Token:
             return self.c.ent_iob
 
     property ent_type_:
-        """Named entity type.
-
-        RETURNS (unicode): Named entity type.
-        """
+        """RETURNS (unicode): Named entity type."""
         def __get__(self):
             return self.vocab.strings[self.c.ent_type]
+
         def __set__(self, ent_type):
             self.c.ent_type = self.vocab.strings.add(ent_type)
 
     property ent_iob_:
         """IOB code of named entity tag. "B" means the token begins an entity,
-        "I" means it is inside an entity, "O" means it is outside an entity, and
-        "" means no entity tag is set.
+        "I" means it is inside an entity, "O" means it is outside an entity,
+        and "" means no entity tag is set.
 
         RETURNS (unicode): IOB code of named entity tag.
         """
@@ -582,10 +614,8 @@ cdef class Token:
             return iob_strings[self.c.ent_iob]
 
     property ent_id:
-        """ID of the entity the token is an instance of, if any. Usually
-        assigned by patterns in the Matcher.
-
-        RETURNS (uint64): ID of the entity.
+        """RETURNS (uint64): ID of the entity the token is an instance of,
+            if any.
         """
         def __get__(self):
             return self.c.ent_id
@@ -594,10 +624,8 @@ cdef class Token:
             self.c.ent_id = key
 
     property ent_id_:
-        """ID of the entity the token is an instance of, if any. Usually
-        assigned by patterns in the Matcher.
-
-        RETURNS (unicode): ID of the entity.
+        """RETURNS (unicode): ID of the entity the token is an instance of,
+            if any.
         """
         def __get__(self):
             return self.vocab.strings[self.c.ent_id]
@@ -606,107 +634,192 @@ cdef class Token:
             self.c.ent_id = self.vocab.strings.add(name)
 
     property whitespace_:
+        """RETURNS (unicode): The trailing whitespace character, if present.
+        """
         def __get__(self):
             return ' ' if self.c.spacy else ''
 
     property orth_:
+        """RETURNS (unicode): Verbatim text content (identical to
+            `Token.text`). Existst mostly for consistency with the other
+            attributes.
+        """
         def __get__(self):
             return self.vocab.strings[self.c.lex.orth]
 
     property lower_:
+        """RETURNS (unicode): The lowercase token text. Equivalent to
+            `Token.text.lower()`.
+        """
         def __get__(self):
             return self.vocab.strings[self.c.lex.lower]
 
     property norm_:
+        """RETURNS (unicode): The token's norm, i.e. a normalised form of the
+            token text. Usually set in the language's tokenizer exceptions or
+            norm exceptions.
+        """
         def __get__(self):
             return self.vocab.strings[self.c.lex.norm]
 
     property shape_:
+        """RETURNS (unicode): Transform of the tokens's string, to show
+            orthographic features. For example, "Xxxx" or "dd".
+        """
         def __get__(self):
             return self.vocab.strings[self.c.lex.shape]
 
     property prefix_:
+        """RETURNS (unicode): A length-N substring from the start of the token.
+            Defaults to `N=1`.
+        """
         def __get__(self):
             return self.vocab.strings[self.c.lex.prefix]
 
     property suffix_:
+        """RETURNS (unicode): A length-N substring from the end of the token.
+            Defaults to `N=3`.
+        """
         def __get__(self):
             return self.vocab.strings[self.c.lex.suffix]
 
     property lang_:
+        """RETURNS (unicode): Language of the parent document's vocabulary,
+            e.g. 'en'.
+        """
         def __get__(self):
             return self.vocab.strings[self.c.lex.lang]
 
     property lemma_:
-        """Base form of the word, with no inflectional suffixes.
-
-        RETURNS (unicode): Token lemma.
+        """RETURNS (unicode): The token lemma, i.e. the base form of the word,
+            with no inflectional suffixes.
         """
         def __get__(self):
             return self.vocab.strings[self.c.lemma]
+
         def __set__(self, unicode lemma_):
             self.c.lemma = self.vocab.strings.add(lemma_)
 
     property pos_:
+        """RETURNS (unicode): Coarse-grained part-of-speech tag."""
         def __get__(self):
             return parts_of_speech.NAMES[self.c.pos]
 
     property tag_:
+        """RETURNS (unicode): Fine-grained part-of-speech tag."""
         def __get__(self):
             return self.vocab.strings[self.c.tag]
+
         def __set__(self, tag):
             self.tag = self.vocab.strings.add(tag)
 
     property dep_:
+        """RETURNS (unicode): The syntactic dependency label."""
         def __get__(self):
             return self.vocab.strings[self.c.dep]
+
         def __set__(self, unicode label):
             self.c.dep = self.vocab.strings.add(label)
 
     property is_oov:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_OOV)
+        """RETURNS (bool): Whether the token is out-of-vocabulary."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, IS_OOV)
 
     property is_stop:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_STOP)
+        """RETURNS (bool): Whether the token is a stop word, i.e. part of a
+            "stop list" defined by the language data.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, IS_STOP)
 
     property is_alpha:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_ALPHA)
+        """RETURNS (bool): Whether the token consists of alpha characters.
+            Equivalent to `token.text.isalpha()`.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, IS_ALPHA)
 
     property is_ascii:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_ASCII)
+        """RETURNS (bool): Whether the token consists of ASCII characters.
+            Equivalent to `[any(ord(c) >= 128 for c in token.text)]`.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, IS_ASCII)
 
     property is_digit:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_DIGIT)
+        """RETURNS (bool): Whether the token consists of digits. Equivalent to
+            `token.text.isdigit()`.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, IS_DIGIT)
 
     property is_lower:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_LOWER)
+        """RETURNS (bool): Whether the token is in lowercase. Equivalent to
+            `token.text.islower()`.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, IS_LOWER)
+
+    property is_upper:
+        """RETURNS (bool): Whether the token is in uppercase. Equivalent to
+            `token.text.isupper()`
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, IS_UPPER)
 
     property is_title:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_TITLE)
+        """RETURNS (bool): Whether the token is in titlecase. Equivalent to
+            `token.text.istitle()`.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, IS_TITLE)
 
     property is_punct:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_PUNCT)
+        """RETURNS (bool): Whether the token is punctuation."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, IS_PUNCT)
 
     property is_space:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_SPACE)
+        """RETURNS (bool): Whether the token consists of whitespace characters.
+            Equivalent to `token.text.isspace()`.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, IS_SPACE)
 
     property is_bracket:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_BRACKET)
+        """RETURNS (bool): Whether the token is a bracket."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, IS_BRACKET)
 
     property is_quote:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_QUOTE)
+        """RETURNS (bool): Whether the token is a quotation mark."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, IS_QUOTE)
 
     property is_left_punct:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_LEFT_PUNCT)
+        """RETURNS (bool): Whether the token is a left punctuation mark."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, IS_LEFT_PUNCT)
 
     property is_right_punct:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_RIGHT_PUNCT)
+        """RETURNS (bool): Whether the token is a left punctuation mark."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, IS_RIGHT_PUNCT)
 
     property like_url:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, LIKE_URL)
+        """RETURNS (bool): Whether the token resembles a URL."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, LIKE_URL)
 
     property like_num:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, LIKE_NUM)
+        """RETURNS (bool): Whether the token resembles a number, e.g. "10.9",
+            "10", "ten", etc.
+        """
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, LIKE_NUM)
 
     property like_email:
-        def __get__(self): return Lexeme.c_check_flag(self.c.lex, LIKE_EMAIL)
+        """RETURNS (bool): Whether the token resembles an email address."""
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c.lex, LIKE_EMAIL)
diff --git a/spacy/tokens/underscore.py b/spacy/tokens/underscore.py
index 6e782647b..d80f50685 100644
--- a/spacy/tokens/underscore.py
+++ b/spacy/tokens/underscore.py
@@ -1,5 +1,9 @@
+# coding: utf8
+from __future__ import unicode_literals
+
 import functools
 
+
 class Underscore(object):
     doc_extensions = {}
     span_extensions = {}
diff --git a/spacy/typedefs.pyx b/spacy/typedefs.pyx
index 8b1378917..e69de29bb 100644
--- a/spacy/typedefs.pyx
+++ b/spacy/typedefs.pyx
@@ -1 +0,0 @@
-
diff --git a/spacy/util.py b/spacy/util.py
index ca5a40f97..a45d43c47 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -10,25 +10,27 @@ from pathlib import Path
 import sys
 import textwrap
 import random
-import numpy
-import io
-import dill
 from collections import OrderedDict
 from thinc.neural._classes.model import Model
 import functools
 
+from .symbols import ORTH
+from .compat import cupy, CudaStream, path2str, basestring_, input_, unicode_
+from .compat import import_file
+
 import msgpack
 import msgpack_numpy
 msgpack_numpy.patch()
-import ujson
-
-from .symbols import ORTH
-from .compat import cupy, CudaStream, path2str, basestring_, input_, unicode_
-from .compat import copy_array, normalize_string_keys, getattr_, import_file
 
 
 LANGUAGES = {}
 _data_path = Path(__file__).parent / 'data'
+_PRINT_ENV = False
+
+
+def set_env_log(value):
+    global _PRINT_ENV
+    _PRINT_ENV = value
 
 
 def get_lang_class(lang):
@@ -38,11 +40,12 @@ def get_lang_class(lang):
     RETURNS (Language): Language class.
     """
     global LANGUAGES
-    if not lang in LANGUAGES:
+    if lang not in LANGUAGES:
         try:
             module = importlib.import_module('.lang.%s' % lang, 'spacy')
         except ImportError:
-            raise ImportError("Can't import language %s from spacy.lang." %lang)
+            msg = "Can't import language %s from spacy.lang."
+            raise ImportError(msg % lang)
         LANGUAGES[lang] = getattr(module, module.__all__[0])
     return LANGUAGES[lang]
 
@@ -100,14 +103,14 @@ def load_model(name, **overrides):
     data_path = get_data_path()
     if not data_path or not data_path.exists():
         raise IOError("Can't find spaCy data path: %s" % path2str(data_path))
-    if isinstance(name, basestring_):
-        if name in set([d.name for d in data_path.iterdir()]): # in data dir / shortcut
+    if isinstance(name, basestring_):  # in data dir / shortcut
+        if name in set([d.name for d in data_path.iterdir()]):
             return load_model_from_link(name, **overrides)
-        if is_package(name): # installed as package
+        if is_package(name):  # installed as package
             return load_model_from_package(name, **overrides)
-        if Path(name).exists(): # path to model data directory
+        if Path(name).exists():  # path to model data directory
             return load_model_from_path(Path(name), **overrides)
-    elif hasattr(name, 'exists'): # Path or Path-like to model data
+    elif hasattr(name, 'exists'):  # Path or Path-like to model data
         return load_model_from_path(name, **overrides)
     raise IOError("Can't find model '%s'" % name)
 
@@ -120,7 +123,7 @@ def load_model_from_link(name, **overrides):
     except AttributeError:
         raise IOError(
             "Cant' load '%s'. If you're using a shortcut link, make sure it "
-            "points to a valid model package (not just a data directory)." % name)
+            "points to a valid package (not just a data directory)." % name)
     return cls.load(**overrides)
 
 
@@ -164,7 +167,8 @@ def load_model_from_init_py(init_file, **overrides):
     data_dir = '%s_%s-%s' % (meta['lang'], meta['name'], meta['version'])
     data_path = model_path / data_dir
     if not model_path.exists():
-        raise ValueError("Can't find model directory: %s" % path2str(data_path))
+        msg = "Can't find model directory: %s"
+        raise ValueError(msg % path2str(data_path))
     return load_model_from_path(data_path, meta, **overrides)
 
 
@@ -176,14 +180,16 @@ def get_model_meta(path):
     """
     model_path = ensure_path(path)
     if not model_path.exists():
-        raise ValueError("Can't find model directory: %s" % path2str(model_path))
+        msg = "Can't find model directory: %s"
+        raise ValueError(msg % path2str(model_path))
     meta_path = model_path / 'meta.json'
     if not meta_path.is_file():
         raise IOError("Could not read meta.json from %s" % meta_path)
     meta = read_json(meta_path)
     for setting in ['lang', 'name', 'version']:
         if setting not in meta or not meta[setting]:
-            raise ValueError("No valid '%s' setting found in model meta.json" % setting)
+            msg = "No valid '%s' setting found in model meta.json"
+            raise ValueError(msg % setting)
     return meta
 
 
@@ -240,7 +246,7 @@ def get_async(stream, numpy_array):
         return numpy_array
     else:
         array = cupy.ndarray(numpy_array.shape, order='C',
-                           dtype=numpy_array.dtype)
+                             dtype=numpy_array.dtype)
         array.set(numpy_array, stream=stream)
         return array
 
@@ -274,12 +280,6 @@ def itershuffle(iterable, bufsize=1000):
         raise StopIteration
 
 
-_PRINT_ENV = False
-def set_env_log(value):
-    global _PRINT_ENV
-    _PRINT_ENV = value
-
-
 def env_opt(name, default=None):
     if type(default) is float:
         type_convert = float
@@ -305,17 +305,20 @@ def read_regex(path):
     path = ensure_path(path)
     with path.open() as file_:
         entries = file_.read().split('\n')
-    expression = '|'.join(['^' + re.escape(piece) for piece in entries if piece.strip()])
+    expression = '|'.join(['^' + re.escape(piece)
+                           for piece in entries if piece.strip()])
     return re.compile(expression)
 
 
 def compile_prefix_regex(entries):
     if '(' in entries:
         # Handle deprecated data
-        expression = '|'.join(['^' + re.escape(piece) for piece in entries if piece.strip()])
+        expression = '|'.join(['^' + re.escape(piece)
+                               for piece in entries if piece.strip()])
         return re.compile(expression)
     else:
-        expression = '|'.join(['^' + piece for piece in entries if piece.strip()])
+        expression = '|'.join(['^' + piece
+                               for piece in entries if piece.strip()])
         return re.compile(expression)
 
 
@@ -359,16 +362,15 @@ def update_exc(base_exceptions, *addition_dicts):
     exc = dict(base_exceptions)
     for additions in addition_dicts:
         for orth, token_attrs in additions.items():
-            if not all(isinstance(attr[ORTH], unicode_) for attr in token_attrs):
-                msg = "Invalid value for ORTH in exception: key='%s', orths='%s'"
+            if not all(isinstance(attr[ORTH], unicode_)
+                       for attr in token_attrs):
+                msg = "Invalid ORTH value in exception: key='%s', orths='%s'"
                 raise ValueError(msg % (orth, token_attrs))
             described_orth = ''.join(attr[ORTH] for attr in token_attrs)
             if orth != described_orth:
-                raise ValueError("Invalid tokenizer exception: ORTH values "
-                                 "combined don't match original string. "
-                                 "key='%s', orths='%s'" % (orth, described_orth))
-        # overlap = set(exc.keys()).intersection(set(additions))
-        # assert not overlap, overlap
+                msg = ("Invalid tokenizer exception: ORTH values combined "
+                       "don't match original string. key='%s', orths='%s'")
+                raise ValueError(msg % (orth, described_orth))
         exc.update(additions)
     exc = expand_exc(exc, "'", "’")
     return exc
@@ -401,17 +403,15 @@ def normalize_slice(length, start, stop, step=None):
         raise ValueError("Stepped slices not supported in Span objects."
                          "Try: list(tokens)[start:stop:step] instead.")
     if start is None:
-       start = 0
+        start = 0
     elif start < 0:
-       start += length
+        start += length
     start = min(length, max(0, start))
-
     if stop is None:
-       stop = length
+        stop = length
     elif stop < 0:
-       stop += length
+        stop += length
     stop = min(length, max(start, stop))
-
     assert 0 <= start <= stop <= length
     return start, stop
 
@@ -428,7 +428,7 @@ def compounding(start, stop, compound):
       >>> assert next(sizes) == 1.5 * 1.5
     """
     def clip(value):
-        return max(value, stop) if (start>stop) else min(value, stop)
+        return max(value, stop) if (start > stop) else min(value, stop)
     curr = float(start)
     while True:
         yield clip(curr)
@@ -438,7 +438,7 @@ def compounding(start, stop, compound):
 def decaying(start, stop, decay):
     """Yield an infinite series of linearly decaying values."""
     def clip(value):
-        return max(value, stop) if (start>stop) else min(value, stop)
+        return max(value, stop) if (start > stop) else min(value, stop)
     nr_upd = 1.
     while True:
         yield clip(start * 1./(1. + decay * nr_upd))
@@ -530,17 +530,19 @@ def print_markdown(data, title=None):
 
     if isinstance(data, dict):
         data = list(data.items())
-    markdown = ["* **{}:** {}".format(l, unicode_(v)) for l, v in data if not excl_value(v)]
+    markdown = ["* **{}:** {}".format(l, unicode_(v))
+                for l, v in data if not excl_value(v)]
     if title:
         print("\n## {}".format(title))
     print('\n{}\n'.format('\n'.join(markdown)))
 
 
 def prints(*texts, **kwargs):
-    """Print formatted message (manual ANSI escape sequences to avoid dependency)
+    """Print formatted message (manual ANSI escape sequences to avoid
+    dependency)
 
     *texts (unicode): Texts to print. Each argument is rendered as paragraph.
-    **kwargs: 'title' becomes coloured headline. 'exits'=True performs sys exit.
+    **kwargs: 'title' becomes coloured headline. exits=True performs sys exit.
     """
     exits = kwargs.get('exits', None)
     title = kwargs.get('title', None)
@@ -570,7 +572,8 @@ def _wrap(text, wrap_max=80, indent=4):
 
 def minify_html(html):
     """Perform a template-specific, rudimentary HTML minification for displaCy.
-    Disclaimer: NOT a general-purpose solution, only removes indentation/newlines.
+    Disclaimer: NOT a general-purpose solution, only removes indentation and
+    newlines.
 
     html (unicode): Markup to minify.
     RETURNS (unicode): "Minified" HTML.
diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx
index fa5fcf624..155d7b9d2 100644
--- a/spacy/vectors.pyx
+++ b/spacy/vectors.pyx
@@ -1,5 +1,6 @@
+# coding: utf8
 from __future__ import unicode_literals
-from libc.stdint cimport int32_t, uint64_t
+
 import numpy
 from collections import OrderedDict
 import msgpack
@@ -9,23 +10,20 @@ cimport numpy as np
 from thinc.neural.util import get_array_module
 from thinc.neural._classes.model import Model
 
-from .typedefs cimport attr_t
 from .strings cimport StringStore
-from . import util
 from .compat import basestring_, path2str
+from . import util
 
 
 cdef class Vectors:
-    '''Store, save and load word vectors.
+    """Store, save and load word vectors.
 
     Vectors data is kept in the vectors.data attribute, which should be an
-    instance of numpy.ndarray (for CPU vectors)
-    or cupy.ndarray (for GPU vectors).
-
-    vectors.key2row is a dictionary mapping word hashes to rows
-    in the vectors.data table. The array `vectors.keys` keeps
-    the keys in order, such that keys[vectors.key2row[key]] == key.
-    '''
+    instance of numpy.ndarray (for CPU vectors) or cupy.ndarray
+    (for GPU vectors). `vectors.key2row` is a dictionary mapping word hashes to
+    rows in the vectors.data table. The array `vectors.keys` keeps the keys in
+    order, such that `keys[vectors.key2row[key]] == key`.
+    """
     cdef public object data
     cdef readonly StringStore strings
     cdef public object key2row
@@ -33,6 +31,16 @@ cdef class Vectors:
     cdef public int i
 
     def __init__(self, strings, width=0, data=None):
+        """Create a new vector store. To keep the vector table empty, pass
+        `width=0`. You can also create the vector table and add vectors one by
+        one, or set the vector values directly on initialisation.
+
+        strings (StringStore or list): List of strings or StringStore that maps
+            strings to hash values, and vice versa.
+        width (int): Number of dimensions.
+        data (numpy.ndarray): The vector data.
+        RETURNS (Vectors): The newly created object.
+        """
         if isinstance(strings, StringStore):
             self.strings = strings
         else:
@@ -55,11 +63,13 @@ cdef class Vectors:
         return (Vectors, (self.strings, self.data))
 
     def __getitem__(self, key):
-        '''Get a vector by key. If key is a string, it is hashed
-        to an integer ID using the vectors.strings table.
+        """Get a vector by key. If key is a string, it is hashed to an integer
+        ID using the vectors.strings table. If the integer key is not found in
+        the table, a KeyError is raised.
 
-        If the integer key is not found in the table, a KeyError is raised.
-        '''
+        key (unicode / int): The key to get the vector for.
+        RETURNS (numpy.ndarray): The vector for the key.
+        """
         if isinstance(key, basestring):
             key = self.strings[key]
         i = self.key2row[key]
@@ -69,30 +79,47 @@ cdef class Vectors:
             return self.data[i]
 
     def __setitem__(self, key, vector):
-        '''Set a vector for the given key. If key is a string, it is hashed
+        """Set a vector for the given key. If key is a string, it is hashed
         to an integer ID using the vectors.strings table.
-        '''
+
+        key (unicode / int): The key to set the vector for.
+        vector (numpy.ndarray): The vector to set.
+        """
         if isinstance(key, basestring):
             key = self.strings.add(key)
         i = self.key2row[key]
         self.data[i] = vector
 
     def __iter__(self):
-        '''Yield vectors from the table.'''
+        """Yield vectors from the table.
+
+        YIELDS (numpy.ndarray): A vector.
+        """
         yield from self.data
 
     def __len__(self):
-        '''Return the number of vectors that have been assigned.'''
+        """Return the number of vectors that have been assigned.
+
+        RETURNS (int): The number of vectors in the data.
+        """
         return self.i
 
     def __contains__(self, key):
-        '''Check whether a key has a vector entry in the table.'''
+        """Check whether a key has a vector entry in the table.
+
+        key (unicode / int): The key to check.
+        RETURNS (bool): Whether the key has a vector entry.
+        """
         if isinstance(key, basestring_):
             key = self.strings[key]
         return key in self.key2row
 
     def add(self, key, vector=None):
-        '''Add a key to the table, optionally setting a vector value as well.'''
+        """Add a key to the table, optionally setting a vector value as well.
+
+        key (unicode / int): The key to add.
+        vector (numpy.ndarray): An optional vector to add.
+        """
         if isinstance(key, basestring_):
             key = self.strings.add(key)
         if key not in self.key2row:
@@ -110,24 +137,36 @@ cdef class Vectors:
         return i
 
     def items(self):
-        '''Iterate over (string key, vector) pairs, in order.'''
+        """Iterate over `(string key, vector)` pairs, in order.
+
+        YIELDS (tuple): A key/vector pair.
+        """
         for i, key in enumerate(self.keys):
             string = self.strings[key]
             yield string, self.data[i]
 
     @property
     def shape(self):
+        """Get `(rows, dims)` tuples of number of rows and number of dimensions
+        in the vector table.
+
+        RETURNS (tuple): A `(rows, dims)` pair.
+        """
         return self.data.shape
 
     def most_similar(self, key):
+        # TODO: implement
         raise NotImplementedError
 
     def from_glove(self, path):
-        '''Load GloVe vectors from a directory. Assumes binary format,
+        """Load GloVe vectors from a directory. Assumes binary format,
         that the vocab is in a vocab.txt, and that vectors are named
         vectors.{size}.[fd].bin, e.g. vectors.128.f.bin for 128d float32
         vectors, vectors.300.d.bin for 300d float64 (double) vectors, etc.
-        By default GloVe outputs 64-bit vectors.'''
+        By default GloVe outputs 64-bit vectors.
+
+        path (unicode / Path): The path to load the GloVe vectors from.
+        """
         path = util.ensure_path(path)
         for name in path.iterdir():
             if name.parts[-1].startswith('vectors'):
@@ -150,9 +189,15 @@ cdef class Vectors:
             self.data
 
     def to_disk(self, path, **exclude):
+        """Save the current state to a directory.
+
+        path (unicode / Path): A path to a directory, which will be created if
+            it doesn't exists. Either a string or a Path-like object.
+        """
         xp = get_array_module(self.data)
         if xp is numpy:
-            save_array = lambda arr, file_: xp.save(file_, arr, allow_pickle=False)
+            save_array = lambda arr, file_: xp.save(file_, arr,
+                                                    allow_pickle=False)
         else:
             save_array = lambda arr, file_: xp.save(file_, arr)
         serializers = OrderedDict((
@@ -162,6 +207,12 @@ cdef class Vectors:
         return util.to_disk(path, serializers, exclude)
 
     def from_disk(self, path, **exclude):
+        """Loads state from a directory. Modifies the object in place and
+        returns it.
+
+        path (unicode / Path): Directory path, string or Path-like object.
+        RETURNS (Vectors): The modified object.
+        """
         def load_keys(path):
             if path.exists():
                 self.keys = numpy.load(path2str(path))
@@ -182,6 +233,11 @@ cdef class Vectors:
         return self
 
     def to_bytes(self, **exclude):
+        """Serialize the current state to a binary string.
+
+        **exclude: Named attributes to prevent from being serialized.
+        RETURNS (bytes): The serialized form of the `Vectors` object.
+        """
         def serialize_weights():
             if hasattr(self.data, 'to_bytes'):
                 return self.data.to_bytes()
@@ -194,6 +250,12 @@ cdef class Vectors:
         return util.to_bytes(serializers, exclude)
 
     def from_bytes(self, data, **exclude):
+        """Load state from a binary string.
+
+        data (bytes): The data to load from.
+        **exclude: Named attributes to prevent from being loaded.
+        RETURNS (Vectors): The `Vectors` object.
+        """
         def deserialize_weights(b):
             if hasattr(self.data, 'from_bytes'):
                 self.data.from_bytes()
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index 38286cb90..8b09d7ee7 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -1,32 +1,24 @@
 # coding: utf8
 from __future__ import unicode_literals
 
-import ujson
-import re
 import numpy
 import dill
 
-from libc.string cimport memset, memcpy
-from libc.stdint cimport int32_t
-from libc.math cimport sqrt
-from cymem.cymem cimport Address
 from collections import OrderedDict
 from .lexeme cimport EMPTY_LEXEME
 from .lexeme cimport Lexeme
 from .strings cimport hash_string
 from .typedefs cimport attr_t
 from .tokens.token cimport Token
-from .attrs cimport PROB, LANG
+from .attrs cimport PROB, LANG, ORTH, TAG
 from .structs cimport SerializedLexemeC
 
-from .compat import copy_reg, pickle, basestring_
+from .compat import copy_reg, basestring_
 from .lemmatizer import Lemmatizer
 from .attrs import intify_attrs
 from .vectors import Vectors
-from . import util
-from . import attrs
-from . import symbols
 from ._ml import link_vectors_to_models
+from . import util
 
 
 cdef class Vocab:
@@ -35,23 +27,22 @@ cdef class Vocab:
     C-data that is shared between `Doc` objects.
     """
     def __init__(self, lex_attr_getters=None, tag_map=None, lemmatizer=None,
-            strings=tuple(), **deprecated_kwargs):
+                 strings=tuple(), **deprecated_kwargs):
         """Create the vocabulary.
 
-        lex_attr_getters (dict): A dictionary mapping attribute IDs to functions
-            to compute them. Defaults to `None`.
-        tag_map (dict): A dictionary mapping fine-grained tags to coarse-grained
+        lex_attr_getters (dict): A dictionary mapping attribute IDs to
+            functions to compute them. Defaults to `None`.
+        tag_map (dict): Dictionary mapping fine-grained tags to coarse-grained
             parts-of-speech, and optionally morphological attributes.
         lemmatizer (object): A lemmatizer. Defaults to `None`.
         strings (StringStore): StringStore that maps strings to integers, and
             vice versa.
-        RETURNS (Vocab): The newly constructed vocab object.
+        RETURNS (Vocab): The newly constructed object.
         """
         lex_attr_getters = lex_attr_getters if lex_attr_getters is not None else {}
         tag_map = tag_map if tag_map is not None else {}
         if lemmatizer in (None, True, False):
             lemmatizer = Lemmatizer({}, {}, {})
-
         self.mem = Pool()
         self._by_hash = PreshMap()
         self._by_orth = PreshMap()
@@ -83,19 +74,20 @@ cdef class Vocab:
 
         The flag_getter function will be called over the words currently in the
         vocab, and then applied to new words as they occur. You'll then be able
-        to access the flag value on each token, using token.check_flag(flag_id).
+        to access the flag value on each token using token.check_flag(flag_id).
         See also: `Lexeme.set_flag`, `Lexeme.check_flag`, `Token.set_flag`,
         `Token.check_flag`.
 
-        flag_getter (callable): A function `f(unicode) -> bool`, to get the flag
-            value.
+        flag_getter (callable): A function `f(unicode) -> bool`, to get the
+            flag value.
         flag_id (int): An integer between 1 and 63 (inclusive), specifying
             the bit at which the flag will be stored. If -1, the lowest
             available bit will be chosen.
         RETURNS (int): The integer ID by which the flag value can be checked.
 
         EXAMPLE:
-            >>> MY_PRODUCT = nlp.vocab.add_flag(lambda text: text in ['spaCy', 'dislaCy'])
+            >>> my_product_getter = lambda text: text in ['spaCy', 'dislaCy']
+            >>> MY_PRODUCT = nlp.vocab.add_flag(my_product_getter)
             >>> doc = nlp(u'I like spaCy')
             >>> assert doc[2].check_flag(MY_PRODUCT) == True
         """
@@ -106,9 +98,10 @@ cdef class Vocab:
                     break
             else:
                 raise ValueError(
-                    "Cannot find empty bit for new lexical flag. All bits between "
-                    "0 and 63 are occupied. You can replace one by specifying the "
-                    "flag_id explicitly, e.g. nlp.vocab.add_flag(your_func, flag_id=IS_ALPHA")
+                    "Cannot find empty bit for new lexical flag. All bits "
+                    "between 0 and 63 are occupied. You can replace one by "
+                    "specifying the flag_id explicitly, e.g. "
+                    "`nlp.vocab.add_flag(your_func, flag_id=IS_ALPHA`.")
         elif flag_id >= 64 or flag_id < 1:
             raise ValueError(
                 "Invalid value for flag_id: %d. Flag IDs must be between "
@@ -119,9 +112,9 @@ cdef class Vocab:
         return flag_id
 
     cdef const LexemeC* get(self, Pool mem, unicode string) except NULL:
-        """Get a pointer to a `LexemeC` from the lexicon, creating a new `Lexeme`
-        if necessary, using memory acquired from the given pool. If the pool
-        is the lexicon's own memory, the lexeme is saved in the lexicon.
+        """Get a pointer to a `LexemeC` from the lexicon, creating a new
+        `Lexeme` if necessary using memory acquired from the given pool. If the
+        pool is the lexicon's own memory, the lexeme is saved in the lexicon.
         """
         if string == u'':
             return &EMPTY_LEXEME
@@ -138,9 +131,9 @@ cdef class Vocab:
             return self._new_lexeme(mem, string)
 
     cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL:
-        """Get a pointer to a `LexemeC` from the lexicon, creating a new `Lexeme`
-        if necessary, using memory acquired from the given pool. If the pool
-        is the lexicon's own memory, the lexeme is saved in the lexicon.
+        """Get a pointer to a `LexemeC` from the lexicon, creating a new
+        `Lexeme` if necessary using memory acquired from the given pool. If the
+        pool is the lexicon's own memory, the lexeme is saved in the lexicon.
         """
         if orth == 0:
             return &EMPTY_LEXEME
@@ -202,8 +195,8 @@ cdef class Vocab:
         for orth, addr in self._by_orth.items():
             yield Lexeme(self, orth)
 
-    def __getitem__(self,  id_or_string):
-        """Retrieve a lexeme, given an int ID or a unicode string.  If a
+    def __getitem__(self, id_or_string):
+        """Retrieve a lexeme, given an int ID or a unicode string. If a
         previously unseen unicode string is given, a new lexeme is created and
         stored.
 
@@ -228,13 +221,14 @@ cdef class Vocab:
         cdef int i
         tokens = <TokenC*>self.mem.alloc(len(substrings) + 1, sizeof(TokenC))
         for i, props in enumerate(substrings):
-            props = intify_attrs(props, strings_map=self.strings, _do_deprecated=True)
+            props = intify_attrs(props, strings_map=self.strings,
+                                 _do_deprecated=True)
             token = &tokens[i]
             # Set the special tokens up to have arbitrary attributes
-            lex = <LexemeC*>self.get_by_orth(self.mem, props[attrs.ORTH])
+            lex = <LexemeC*>self.get_by_orth(self.mem, props[ORTH])
             token.lex = lex
-            if attrs.TAG in props:
-                self.morphology.assign_tag(token, props[attrs.TAG])
+            if TAG in props:
+                self.morphology.assign_tag(token, props[TAG])
             for attr_id, value in props.items():
                 Token.set_struct_attr(token, attr_id, value)
                 Lexeme.set_struct_attr(lex, attr_id, value)
@@ -253,16 +247,13 @@ cdef class Vocab:
         self.vectors = Vectors(self.strings, width=new_dim)
 
     def get_vector(self, orth):
-        """Retrieve a vector for a word in the vocabulary.
+        """Retrieve a vector for a word in the vocabulary. Words can be looked
+        up by string or int ID. If no vectors data is loaded, ValueError is
+        raised.
 
-        Words can be looked up by string or int ID.
-
-        RETURNS:
-            A word vector. Size and shape determined by the
-            vocab.vectors instance. Usually, a numpy ndarray
-            of shape (300,) and dtype float32.
-
-        RAISES: If no vectors data is loaded, ValueError is raised.
+        RETURNS (numpy.ndarray): A word vector. Size
+            and shape determined by the `vocab.vectors` instance. Usually, a
+            numpy ndarray of shape (300,) and dtype float32.
         """
         if isinstance(orth, basestring_):
             orth = self.strings.add(orth)
@@ -272,21 +263,16 @@ cdef class Vocab:
             return numpy.zeros((self.vectors_length,), dtype='f')
 
     def set_vector(self, orth, vector):
-        """Set a vector for a word in the vocabulary.
-
-        Words can be referenced by string or int ID.
-
-        RETURNS:
-            None
+        """Set a vector for a word in the vocabulary. Words can be referenced
+        by string or int ID.
         """
         if not isinstance(orth, basestring_):
             orth = self.strings[orth]
         self.vectors.add(orth, vector=vector)
 
     def has_vector(self, orth):
-        """Check whether a word has a vector. Returns False if no
-        vectors have been loaded. Words can be looked up by string
-        or int ID."""
+        """Check whether a word has a vector. Returns False if no vectors have
+        been loaded. Words can be looked up by string or int ID."""
         if isinstance(orth, basestring_):
             orth = self.strings.add(orth)
         return orth in self.vectors
@@ -295,7 +281,7 @@ cdef class Vocab:
         """Save the current state to a directory.
 
         path (unicode or Path): A path to a directory, which will be created if
-            it doesn't exist. Paths may be either strings or `Path`-like objects.
+            it doesn't exist. Paths may be either strings or Path-like objects.
         """
         path = util.ensure_path(path)
         if not path.exists():
@@ -420,16 +406,13 @@ def pickle_vocab(vocab):
     length = vocab.length
     data_dir = vocab.data_dir
     lex_attr_getters = dill.dumps(vocab.lex_attr_getters)
-
     lexemes_data = vocab.lexemes_to_bytes()
-
     return (unpickle_vocab,
-        (sstore, morph, data_dir, lex_attr_getters,
-            lexemes_data, length))
+            (sstore, morph, data_dir, lex_attr_getters, lexemes_data, length))
 
 
 def unpickle_vocab(sstore, morphology, data_dir,
-        lex_attr_getters, bytes lexemes_data, int length):
+                   lex_attr_getters, bytes lexemes_data, int length):
     cdef Vocab vocab = Vocab()
     vocab.length = length
     vocab.strings = sstore
@@ -449,12 +432,10 @@ class LookupError(Exception):
     @classmethod
     def mismatched_strings(cls, id_, id_string, original_string):
         return cls(
-            "Error fetching a Lexeme from the Vocab. When looking up a string, "
-            "the lexeme returned had an orth ID that did not match the query string. "
-            "This means that the cached lexeme structs are mismatched to the "
-            "string encoding table. The mismatched:\n"
-            "Query string: {query}\n"
-            "Orth cached: {orth_str}\n"
-            "ID of orth: {orth_id}".format(
-                query=repr(original_string), orth_str=repr(id_string), orth_id=id_)
-        )
+            "Error fetching a Lexeme from the Vocab. When looking up a "
+            "string, the lexeme returned had an orth ID that did not match "
+            "the query string. This means that the cached lexeme structs are "
+            "mismatched to the string encoding table. The mismatched:\n"
+            "Query string: {}\n"
+            "Orth cached: {}\n"
+            "Orth ID: {}".format(repr(original_string), repr(id_string), id_))
diff --git a/website/api/_top-level/_cli.jade b/website/api/_top-level/_cli.jade
index fc573e0ec..f19eb43d0 100644
--- a/website/api/_top-level/_cli.jade
+++ b/website/api/_top-level/_cli.jade
@@ -134,11 +134,12 @@ p
 p
     |  Convert files into spaCy's #[+a("/api/annotation#json-input") JSON format]
     |  for use with the #[code train] command and other experiment management
-    |  functions. The right converter is chosen based on the file extension of
-    |  the input file. Currently only supports #[code .conllu].
+    |  functions. The converter can be specified on the command line, or
+    |  chosen based on the file extension of the input file.
 
 +code(false, "bash", "$", false, false, true).
-    spacy convert [input_file] [output_dir] [--n-sents] [--morphology]
+    spacy convert [input_file] [output_dir] [--converter] [--n-sents]
+    [--morphology]
 
 +table(["Argument", "Type", "Description"])
     +row
@@ -151,6 +152,11 @@ p
         +cell positional
         +cell Output directory for converted JSON file.
 
+    +row
+        +cell #[code converter], #[code -c]
+        +cell option
+        +cell #[+tag-new(2)] Name of converter to use (see below).
+
     +row
         +cell #[code --n-sents], #[code -n]
         +cell option
@@ -166,6 +172,25 @@ p
         +cell flag
         +cell Show help message and available arguments.
 
+p The following converters are available:
+
++table(["ID", "Description"])
+    +row
+        +cell #[code auto]
+        +cell Automatically pick converter based on file extension (default).
+
+    +row
+        +cell #[code conllu], #[code conll]
+        +cell Universal Dependencies #[code .conllu] or #[code .conll] format.
+
+    +row
+        +cell #[code ner]
+        +cell Tab-based named entity recognition format.
+
+    +row
+        +cell #[code iob]
+        +cell IOB named entity recognition format.
+
 +h(3, "train") Train
 
 p
diff --git a/website/api/doc.jade b/website/api/doc.jade
index ceb564c7a..ac91ad427 100644
--- a/website/api/doc.jade
+++ b/website/api/doc.jade
@@ -332,6 +332,26 @@ p
         +cell dict
         +cell A dictionary mapping attributes to integer counts.
 
++h(2, "get_lca_matrix") Doc.get_lca_matrix
+    +tag method
+
+p
+    |  Calculates the lowest common ancestor matrix for a given #[code Doc].
+    |  Returns LCA matrix containing the integer index of the ancestor, or
+    |  #[code -1] if no common ancestor is found, e.g. if span excludes a
+    |  necessary ancestor.
+
++aside-code("Example").
+    doc = nlp(u"This is a test")
+    matrix = doc.get_lca_matrix()
+    # array([[0, 1, 1, 1], [1, 1, 1, 1], [1, 1, 2, 3], [1, 1, 3, 3]], dtype=int32)
+
++table(["Name", "Type", "Description"])
+    +row("foot")
+        +cell returns
+        +cell #[code.u-break numpy.ndarray[ndim=2, dtype='int32']]
+        +cell The lowest common ancestor matrix of the #[code Doc].
+
 +h(2, "to_array") Doc.to_array
     +tag method
 
@@ -764,3 +784,10 @@ p
         +cell
             |  A dictionary that allows customisation of properties of
             |  #[code Span] children.
+
+    +row
+        +cell #[code _]
+        +cell #[code Underscore]
+        +cell
+            |  User space for adding custom
+            |  #[+a("/usage/processing-pipelines#custom-components-attributes") attribute extensions].
diff --git a/website/api/lexeme.jade b/website/api/lexeme.jade
index dddefd2d7..86fa18730 100644
--- a/website/api/lexeme.jade
+++ b/website/api/lexeme.jade
@@ -157,27 +157,61 @@ p The L2 norm of the lexeme's vector representation.
     +row
         +cell #[code vocab]
         +cell #[code Vocab]
-        +cell
+        +cell The lexeme's vocabulary.
 
     +row
         +cell #[code text]
         +cell unicode
         +cell Verbatim text content.
 
+    +row
+        +cell #[code orth]
+        +cell int
+        +cell ID of the verbatim text content.
+
+    +row
+        +cell #[code orth_]
+        +cell unicode
+        +cell
+            |  Verbatim text content (identical to #[code Lexeme.text]). Existst
+            |  mostly for consistency with the other attributes.
+
     +row
         +cell #[code lex_id]
         +cell int
         +cell ID of the lexeme's lexical type.
 
+    +row
+        +cell #[code rank]
+        +cell int
+        +cell
+            |  Sequential ID of the lexemes's lexical type, used to index into
+            |  tables, e.g. for word vectors.
+
+    +row
+        +cell #[code flags]
+        +cell int
+        +cell Container of the lexeme's binary flags.
+
+    +row
+        +cell #[code norm]
+        +cell int
+        +cell The lexemes's norm, i.e. a normalised form of the lexeme text.
+
+    +row
+        +cell #[code norm_]
+        +cell unicode
+        +cell The lexemes's norm, i.e. a normalised form of the lexeme text.
+
     +row
         +cell #[code lower]
         +cell int
-        +cell Lower-case form of the word.
+        +cell Lowercase form of the word.
 
     +row
         +cell #[code lower_]
         +cell unicode
-        +cell Lower-case form of the word.
+        +cell Lowercase form of the word.
 
     +row
         +cell #[code shape]
@@ -192,22 +226,30 @@ p The L2 norm of the lexeme's vector representation.
     +row
         +cell #[code prefix]
         +cell int
-        +cell Length-N substring from the start of the word. Defaults to #[code N=1].
+        +cell
+            |  Length-N substring from the start of the word. Defaults to
+            |  #[code N=1].
 
     +row
         +cell #[code prefix_]
         +cell unicode
-        +cell Length-N substring from the start of the word. Defaults to #[code N=1].
+        +cell
+            |  Length-N substring from the start of the word. Defaults to
+            |  #[code N=1].
 
     +row
         +cell #[code suffix]
         +cell int
-        +cell Length-N substring from the end of the word. Defaults to #[code N=3].
+        +cell
+            |  Length-N substring from the end of the word. Defaults to
+            |  #[code N=3].
 
     +row
         +cell #[code suffix_]
         +cell unicode
-        +cell Length-N substring from the start of the word. Defaults to #[code N=3].
+        +cell
+            |  Length-N substring from the start of the word. Defaults to
+            |  #[code N=3].
 
     +row
         +cell #[code is_alpha]
@@ -237,6 +279,13 @@ p The L2 norm of the lexeme's vector representation.
             |  Is the lexeme in lowercase? Equivalent to
             |  #[code lexeme.text.islower()].
 
+    +row
+        +cell #[code is_upper]
+        +cell bool
+        +cell
+            |  Is the lexeme in uppercase? Equivalent to
+            |  #[code lexeme.text.isupper()].
+
     +row
         +cell #[code is_title]
         +cell bool
@@ -249,6 +298,16 @@ p The L2 norm of the lexeme's vector representation.
         +cell bool
         +cell Is the lexeme punctuation?
 
+    +row
+        +cell #[code is_left_punct]
+        +cell bool
+        +cell Is the lexeme a left punctuation mark, e.g. #[code (]?
+
+    +row
+        +cell #[code is_right_punct]
+        +cell bool
+        +cell Is the lexeme a right punctuation mark, e.g. #[code )]?
+
     +row
         +cell #[code is_space]
         +cell bool
@@ -256,6 +315,16 @@ p The L2 norm of the lexeme's vector representation.
             |  Does the lexeme consist of whitespace characters? Equivalent to
             |  #[code lexeme.text.isspace()].
 
+    +row
+        +cell #[code is_bracket]
+        +cell bool
+        +cell Is the lexeme a bracket?
+
+    +row
+        +cell #[code is_quote]
+        +cell bool
+        +cell Is the lexeme a quotation mark?
+
     +row
         +cell #[code like_url]
         +cell bool
@@ -285,6 +354,7 @@ p The L2 norm of the lexeme's vector representation.
         +cell #[code lang]
         +cell int
         +cell Language of the parent vocabulary.
+
     +row
         +cell #[code lang_]
         +cell unicode
@@ -293,9 +363,16 @@ p The L2 norm of the lexeme's vector representation.
     +row
         +cell #[code prob]
         +cell float
-        +cell Smoothed log probability estimate of lexeme's type.
+        +cell Smoothed log probability estimate of the lexeme's type.
+
+    +row
+        +cell #[code cluster]
+        +cell int
+        +cell Brown cluster ID.
 
     +row
         +cell #[code sentiment]
         +cell float
-        +cell A scalar value indicating the positivity or negativity of the lexeme.
+        +cell
+            |  A scalar value indicating the positivity or negativity of the
+            |  lexeme.
diff --git a/website/api/span.jade b/website/api/span.jade
index 2a55409f1..266518076 100644
--- a/website/api/span.jade
+++ b/website/api/span.jade
@@ -248,6 +248,28 @@ p
         +cell float
         +cell A scalar similarity score. Higher is more similar.
 
++h(2, "get_lca_matrix") Span.get_lca_matrix
+    +tag method
+
+p
+    |  Calculates the lowest common ancestor matrix for a given #[code Span].
+    |  Returns LCA matrix containing the integer index of the ancestor, or
+    |  #[code -1] if no common ancestor is found, e.g. if span excludes a
+    |  necessary ancestor.
+
++aside-code("Example").
+    doc = nlp(u'I like New York in Autumn')
+    span = doc[1:4]
+    matrix = span.get_lca_matrix()
+    # array([[0, 0, 0], [0, 1, 2], [0, 2, 2]], dtype=int32)
+
++table(["Name", "Type", "Description"])
+    +row("foot")
+        +cell returns
+        +cell #[code.u-break numpy.ndarray[ndim=2, dtype='int32']]
+        +cell The lowest common ancestor matrix of the #[code Span].
+
+
 +h(2, "to_array") Span.to_array
     +tag method
     +tag-new(2)
@@ -347,7 +369,7 @@ p
     +tag property
     +tag-model("parse")
 
-p Tokens that are to the left of the span, whose head is within the span.
+p Tokens that are to the left of the span, whose heads are within the span.
 
 +aside-code("Example").
     doc = nlp(u'I like New York in Autumn.')
@@ -364,7 +386,7 @@ p Tokens that are to the left of the span, whose head is within the span.
     +tag property
     +tag-model("parse")
 
-p Tokens that are to the right of the span, whose head is within the span.
+p Tokens that are to the right of the span, whose heads are within the span.
 
 +aside-code("Example").
     doc = nlp(u'I like New York in Autumn.')
@@ -377,6 +399,42 @@ p Tokens that are to the right of the span, whose head is within the span.
         +cell #[code Token]
         +cell A right-child of a token of the span.
 
++h(2, "n_lefts") Span.n_lefts
+    +tag property
+    +tag-model("parse")
+
+p
+    |  The number of tokens that are to the left of the span, whose heads are
+    |  within the span.
+
++aside-code("Example").
+    doc = nlp(u'I like New York in Autumn.')
+    assert doc[3:7].n_lefts == 1
+
++table(["Name", "Type", "Description"])
+    +row("foot")
+        +cell returns
+        +cell int
+        +cell The number of left-child tokens.
+
++h(2, "n_rights") Span.n_rights
+    +tag property
+    +tag-model("parse")
+
+p
+    |  The number of tokens that are to the right of the span, whose heads are
+    |  within the span.
+
++aside-code("Example").
+    doc = nlp(u'I like New York in Autumn.')
+    assert doc[2:4].n_rights == 1
+
++table(["Name", "Type", "Description"])
+    +row("foot")
+        +cell returns
+        +cell int
+        +cell The number of right-child tokens.
+
 +h(2, "subtree") Span.subtree
     +tag property
     +tag-model("parse")
@@ -495,6 +553,18 @@ p
             |  The text content of the span with a trailing whitespace character
             |  if the last token has one.
 
+    +row
+        +cell #[code orth]
+        +cell int
+        +cell ID of the verbatim text content.
+
+    +row
+        +cell #[code orth_]
+        +cell unicode
+        +cell
+            |  Verbatim text content (identical to #[code Span.text]). Existst
+            |  mostly for consistency with the other attributes.
+
     +row
         +cell #[code label]
         +cell int
@@ -519,3 +589,17 @@ p
         +cell #[code ent_id_]
         +cell unicode
         +cell The string ID of the named entity the token is an instance of.
+
+    +row
+        +cell #[code sentiment]
+        +cell float
+        +cell
+            |  A scalar value indicating the positivity or negativity of the
+            |  span.
+
+    +row
+        +cell #[code _]
+        +cell #[code Underscore]
+        +cell
+            |  User space for adding custom
+            |  #[+a("/usage/processing-pipelines#custom-components-attributes") attribute extensions].
diff --git a/website/api/token.jade b/website/api/token.jade
index 4062594b4..f8fa15fe8 100644
--- a/website/api/token.jade
+++ b/website/api/token.jade
@@ -302,6 +302,80 @@ p A sequence of the token's immediate syntactic children.
         +cell #[code Token]
         +cell A child token such that #[code child.head==self].
 
++h(2, "lefts") Token.lefts
+    +tag property
+    +tag-model("parse")
+
+p
+    |  The leftward immediate children of the word, in the syntactic dependency
+    |  parse.
+
++aside-code("Example").
+    doc = nlp(u'I like New York in Autumn.')
+    lefts = [t.text for t in doc[3].lefts]
+    assert lefts == [u'New']
+
++table(["Name", "Type", "Description"])
+    +row("foot")
+        +cell yields
+        +cell #[code Token]
+        +cell A left-child of the token.
+
++h(2, "rights") Token.rights
+    +tag property
+    +tag-model("parse")
+
+p
+    |  The rightward immediate children of the word, in the syntactic
+    |  dependency parse.
+
++aside-code("Example").
+    doc = nlp(u'I like New York in Autumn.')
+    rights = [t.text for t in doc[3].rights]
+    assert rights == [u'in']
+
++table(["Name", "Type", "Description"])
+    +row("foot")
+        +cell yields
+        +cell #[code Token]
+        +cell A right-child of the token.
+
++h(2, "n_lefts") Token.n_lefts
+    +tag property
+    +tag-model("parse")
+
+p
+    |  The number of leftward immediate children of the word, in the syntactic
+    |  dependency parse.
+
++aside-code("Example").
+    doc = nlp(u'I like New York in Autumn.')
+    assert doc[3].n_lefts == 1
+
++table(["Name", "Type", "Description"])
+    +row("foot")
+        +cell returns
+        +cell int
+        +cell The number of left-child tokens.
+
++h(2, "n_rights") Token.n_rights
+    +tag property
+    +tag-model("parse")
+
+p
+    |  The number of rightward immediate children of the word, in the syntactic
+    |  dependency parse.
+
++aside-code("Example").
+    doc = nlp(u'I like New York in Autumn.')
+    assert doc[3].n_rights == 1
+
++table(["Name", "Type", "Description"])
+    +row("foot")
+        +cell returns
+        +cell int
+        +cell The number of right-child tokens.
+
 +h(2, "subtree") Token.subtree
     +tag property
     +tag-model("parse")
@@ -489,15 +563,35 @@ p The L2 norm of the token's vector representation.
         +cell unicode
         +cell Base form of the token, with no inflectional suffixes.
 
+    +row
+        +cell #[code norm]
+        +cell int
+        +cell
+            |  The token's norm, i.e. a normalised form of the token text.
+            |  Usually set in the language's
+            |  #[+a("/usage/adding-languages#tokenizer-exceptions") tokenizer exceptions] or
+            |  #[+a("/usage/adding-languages#norm-exceptions") norm exceptions].
+
+    +row
+        +cell #[code norm_]
+        +cell unicode
+        +cell
+            |  The token's norm, i.e. a normalised form of the token text.
+            |  Usually set in the language's
+            |  #[+a("/usage/adding-languages#tokenizer-exceptions") tokenizer exceptions] or
+            |  #[+a("/usage/adding-languages#norm-exceptions") norm exceptions].
+
     +row
         +cell #[code lower]
         +cell int
-        +cell Lower-case form of the token.
+        +cell Lowercase form of the token.
 
     +row
         +cell #[code lower_]
         +cell unicode
-        +cell Lower-case form of the token.
+        +cell
+            |  Lowercase form of the token text. Equivalent to
+            |  #[code Token.text.lower()].
 
     +row
         +cell #[code shape]
@@ -537,7 +631,9 @@ p The L2 norm of the token's vector representation.
     +row
         +cell #[code suffix_]
         +cell unicode
-        +cell Length-N substring from the end of the token. Defaults to #[code N=3].
+        +cell
+            |  Length-N substring from the end of the token. Defaults to
+            |  #[code N=3].
 
     +row
         +cell #[code is_alpha]
@@ -672,6 +768,7 @@ p The L2 norm of the token's vector representation.
         +cell #[code lang]
         +cell int
         +cell Language of the parent document's vocabulary.
+
     +row
         +cell #[code lang_]
         +cell unicode
@@ -690,9 +787,30 @@ p The L2 norm of the token's vector representation.
     +row
         +cell #[code sentiment]
         +cell float
-        +cell A scalar value indicating the positivity or negativity of the token.
+        +cell
+            |  A scalar value indicating the positivity or negativity of the
+            |  token.
 
     +row
         +cell #[code lex_id]
         +cell int
-        +cell ID of the token's lexical type.
+        +cell Sequential ID of the token's lexical type.
+
+    +row
+        +cell #[code rank]
+        +cell int
+        +cell
+            |  Sequential ID of the token's lexical type, used to index into
+            |  tables, e.g. for word vectors.
+
+    +row
+        +cell #[code cluster]
+        +cell int
+        +cell Brown cluster ID.
+
+    +row
+        +cell #[code _]
+        +cell #[code Underscore]
+        +cell
+            |  User space for adding custom
+            |  #[+a("/usage/processing-pipelines#custom-components-attributes") attribute extensions].
diff --git a/website/api/vectors.jade b/website/api/vectors.jade
index e08f34643..692bd1ca8 100644
--- a/website/api/vectors.jade
+++ b/website/api/vectors.jade
@@ -36,12 +36,14 @@ p
             |  that maps strings to hash values, and vice versa.
 
     +row
-        +cell #[code data]
-        +cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
+        +cell #[code width]
+        +cell int
+        +cell Number of dimensions.
 
     +row
-        +cell #[code width]
-        +cell Number of dimensions.
+        +cell #[code data]
+        +cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
+        +cell The vector data.
 
     +row("foot")
         +cell returns
@@ -208,7 +210,7 @@ p
     +row("foot")
         +cell returns
         +cell tuple
-        +cell #[code (rows, dims)] pairs.
+        +cell A #[code (rows, dims)] pair.
 
 +h(2, "from_glove") Vectors.from_glove
     +tag method
@@ -238,11 +240,16 @@ p Save the current state to a directory.
 +table(["Name", "Type", "Description"])
     +row
         +cell #[code path]
-        +cell unicode or #[code Path]
+        +cell unicode / #[code Path]
         +cell
             |  A path to a directory, which will be created if it doesn't exist.
             |  Paths may be either strings or #[code Path]-like objects.
 
+    +row
+        +cell #[code **exclude]
+        +cell -
+        +cell Named attributes to prevent from being saved.
+
 +h(2, "from_disk") Vectors.from_disk
     +tag method
 
@@ -255,7 +262,7 @@ p Loads state from a directory. Modifies the object in place and returns it.
 +table(["Name", "Type", "Description"])
     +row
         +cell #[code path]
-        +cell unicode or #[code Path]
+        +cell unicode / #[code Path]
         +cell
             |  A path to a directory. Paths may be either strings or
             |  #[code Path]-like objects.
@@ -297,7 +304,7 @@ p Load state from a binary string.
 
 +table(["Name", "Type", "Description"])
     +row
-        +cell #[code bytes_data]
+        +cell #[code data]
         +cell bytes
         +cell The data to load from.
 
diff --git a/website/usage/_linguistic-features/_dependency-parse.jade b/website/usage/_linguistic-features/_dependency-parse.jade
index 85d9179df..0fcdd4713 100644
--- a/website/usage/_linguistic-features/_dependency-parse.jade
+++ b/website/usage/_linguistic-features/_dependency-parse.jade
@@ -111,11 +111,13 @@ p
 
 p
     |  A few more convenience attributes are provided for iterating around the
-    |  local tree from the token. The #[code .lefts] and #[code .rights]
-    |  attributes provide sequences of syntactic children that occur before and
-    |  after the token. Both sequences are in sentences order. There are also
-    |  two integer-typed attributes, #[code .n_rights] and #[code .n_lefts],
-    |  that give the number of left and right children.
+    |  local tree from the token. The #[+api("token#lefts") #[code Token.lefts]]
+    |  and #[+api("token#rights") #[code Token.rights]] attributes provide
+    |  sequences of syntactic children that occur before and after the token.
+    |  Both sequences are in sentence order. There are also two integer-typed
+    |  attributes, #[+api("token#n_rights") #[code Token.n_rights]] and
+    |  #[+api("token#n_lefts") #[code Token.n_lefts]], that give the number of
+    |  left and right children.
 
 +code.
     doc = nlp(u'bright red apples on the tree')
@@ -126,10 +128,11 @@ p
 
 p
     |  You can get a whole phrase by its syntactic head using the
-    |  #[code .subtree] attribute. This returns an ordered sequence of tokens.
-    |  You can walk up the tree with the #[code .ancestors] attribute, and
-    |  check dominance with the #[+api("token#is_ancestor") #[code .is_ancestor()]]
-    |  method.
+    |  #[+api("token#subtree") #[code Token.subtree]] attribute. This returns an
+    |  ordered  sequence of tokens. You can walk up the tree with the
+    |  #[+api("token#ancestors") #[code Token.ancestors]] attribute, and
+    |  check dominance with
+    |  #[+api("token#is_ancestor") #[code Token.is_ancestor()]].
 
 +aside("Projective vs. non-projective")
     |  For the #[+a("/models/en") default English model], the