Merge remote-tracking branch 'origin/develop' into feature/phrasematcher

2025-08-24 05:54:55 +03:00 · 2017-09-26 08:32:55 -05:00 · 2017-09-26 08:32:55 -05:00 · d02a41a8c9
commit d02a41a8c9
parent 7123139b2b 8c390e23a2
9 changed files with 219 additions and 105 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,4 @@
-cython>=0.24
+cython>=0.24,<0.27.0
 pathlib
 numpy>=1.7
 cymem>=1.30,<1.32
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@ -4,6 +4,7 @@ from thinc.neural import Model, Maxout, Softmax, Affine
 from thinc.neural._classes.hash_embed import HashEmbed
 from thinc.neural.ops import NumpyOps, CupyOps
 from thinc.neural.util import get_array_module
+import thinc.extra.load_nlp
 import random
 import cytoolz

@ -31,6 +32,7 @@ from . import util
 import numpy
 import io

+VECTORS_KEY = 'spacy_pretrained_vectors'

@layerize
 def _flatten_add_lengths(seqs, pad=0, drop=0.):
@ -225,42 +227,52 @@ def drop_layer(layer, factor=2.):
    model.predict = layer
    return model

+def link_vectors_to_models(vocab):
+    vectors = vocab.vectors
+    ops = Model.ops
+    for word in vocab:
+        if word.orth in vectors.key2row:
+            word.rank = vectors.key2row[word.orth]
+        else:
+            word.rank = 0
+    data = ops.asarray(vectors.data)
+    # Set an entry here, so that vectors are accessed by StaticVectors
+    # (unideal, I know)
+    thinc.extra.load_nlp.VECTORS[(ops.device, VECTORS_KEY)] = data

-def Tok2Vec(width, embed_size, pretrained_dims=0):
-    if pretrained_dims is None:
-        pretrained_dims = 0
+
+def Tok2Vec(width, embed_size, **kwargs):
+    pretrained_dims = kwargs.get('pretrained_dims', 0)
+    cnn_maxout_pieces = kwargs.get('cnn_maxout_pieces', 3)
    cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
-    with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}):
+    with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add,
+                                 '*': reapply}):
        norm = HashEmbed(width, embed_size, column=cols.index(NORM), name='embed_norm')
        prefix = HashEmbed(width, embed_size//2, column=cols.index(PREFIX), name='embed_prefix')
        suffix = HashEmbed(width, embed_size//2, column=cols.index(SUFFIX), name='embed_suffix')
        shape = HashEmbed(width, embed_size//2, column=cols.index(SHAPE), name='embed_shape')
+        if pretrained_dims is not None and pretrained_dims >= 1:
+            glove = StaticVectors(VECTORS_KEY, width, column=cols.index(ID))

-        trained_vectors = (
+            embed = uniqued(
+                (glove | norm | prefix | suffix | shape)
+                >> LN(Maxout(width, width*5, pieces=3)), column=5)
+        else:
+            embed = uniqued(
+                (norm | prefix | suffix | shape)
+                >> LN(Maxout(width, width*4, pieces=3)), column=5)
+
+
+        convolution = Residual(
+            ExtractWindow(nW=1)
+            >> LN(Maxout(width, width*3, pieces=cnn_maxout_pieces))
+        )
+
+        tok2vec = (
            FeatureExtracter(cols)
            >> with_flatten(
-                uniqued(
-                    (norm | prefix | suffix | shape)
-                    >> LN(Maxout(width, width*4, pieces=3)), column=5)
-            )
+                embed >> (convolution * 4), pad=4)
        )
-        convolution = Residual(ExtractWindow(nW=1) >> LN(Maxout(width, width*3, pieces=3)))
-
-        if pretrained_dims >= 1:
-            embed = concatenate_lists(trained_vectors, SpacyVectors)
-            tok2vec = (
-                embed
-                >> with_flatten(
-                    Affine(width, width+pretrained_dims)
-                    >> convolution ** 4,
-                    pad=4)
-            )
-        else:
-            embed = trained_vectors
-            tok2vec = (
-                embed
-                >> with_flatten(convolution ** 4, pad=4)
-            )

        # Work around thinc API limitations :(. TODO: Revise in Thinc 7
        tok2vec.nO = width
@ -268,6 +280,28 @@ def Tok2Vec(width, embed_size, pretrained_dims=0):
    return tok2vec


+def reapply(layer, n_times):
+    def reapply_fwd(X, drop=0.):
+        backprops = []
+        for i in range(n_times):
+            Y, backprop = layer.begin_update(X, drop=drop)
+            X = Y
+            backprops.append(backprop)
+        def reapply_bwd(dY, sgd=None):
+            dX = None
+            for backprop in reversed(backprops):
+                dY = backprop(dY, sgd=sgd)
+                if dX is None:
+                    dX = dY
+                else:
+                    dX += dY
+            return dX
+        return Y, reapply_bwd
+    return wrap(reapply_fwd, layer)
+
+
+
+
 def asarray(ops, dtype):
    def forward(X, drop=0.):
        return ops.asarray(X, dtype=dtype), None
@ -471,8 +505,13 @@ def getitem(i):
        return X[i], None
    return layerize(getitem_fwd)

-def build_tagger_model(nr_class, token_vector_width, pretrained_dims=0, **cfg):
+def build_tagger_model(nr_class, **cfg):
    embed_size = util.env_opt('embed_size', 4000)
+    if 'token_vector_width' in cfg:
+        token_vector_width = cfg['token_vector_width']
+    else:
+        token_vector_width = util.env_opt('token_vector_width', 128)
+    pretrained_dims = cfg.get('pretrained_dims', 0)
    with Model.define_operators({'>>': chain, '+': add}):
        # Input: (doc, tensor) tuples
        private_tok2vec = Tok2Vec(token_vector_width, embed_size,
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -8,6 +8,7 @@ import cytoolz
 from pathlib import Path
 import dill
 import tqdm
+from thinc.neural._classes.model import Model
 from thinc.neural.optimizers import linear_decay
 from timeit import default_timer as timer

@ -17,6 +18,7 @@ from ..gold import GoldParse, merge_sents
 from ..gold import GoldCorpus, minibatch
 from ..util import prints
 from .. import util
+from .. import about
 from .. import displacy
 from ..compat import json_dumps

@ -29,15 +31,16 @@ from ..compat import json_dumps
    n_iter=("number of iterations", "option", "n", int),
    n_sents=("number of sentences", "option", "ns", int),
    use_gpu=("Use GPU", "option", "g", int),
-    resume=("Whether to resume training", "flag", "R", bool),
+    vectors=("Model to load vectors from", "option", "v"),
    no_tagger=("Don't train tagger", "flag", "T", bool),
    no_parser=("Don't train parser", "flag", "P", bool),
    no_entities=("Don't train NER", "flag", "N", bool),
    gold_preproc=("Use gold preprocessing", "flag", "G", bool),
+    meta_path=("Optional path to meta.json. All relevant properties will be overwritten.", "option", "m", Path)
 )
 def train(cmd, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
-          use_gpu=-1, resume=False, no_tagger=False, no_parser=False, no_entities=False,
-          gold_preproc=False):
+          use_gpu=-1, vectors=None, no_tagger=False, no_parser=False, no_entities=False,
+          gold_preproc=False, meta_path=None):
    """
    Train a model. Expects data in spaCy's JSON format.
    """
@ -46,13 +49,19 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
    output_path = util.ensure_path(output_dir)
    train_path = util.ensure_path(train_data)
    dev_path = util.ensure_path(dev_data)
+    meta_path = util.ensure_path(meta_path)
    if not output_path.exists():
        output_path.mkdir()
    if not train_path.exists():
        prints(train_path, title="Training data not found", exits=1)
    if dev_path and not dev_path.exists():
        prints(dev_path, title="Development data not found", exits=1)
-
+    if meta_path is not None and not meta_path.exists():
+        prints(meta_path, title="meta.json not found", exits=1)
+    meta = util.read_json(meta_path) if meta_path else {}
+    if not isinstance(meta, dict):
+        prints("Expected dict but got: {}".format(type(meta)),
+               title="Not a valid meta.json format", exits=1)

    pipeline = ['token_vectors', 'tags', 'dependencies', 'entities']
    if no_tagger and 'tags' in pipeline: pipeline.remove('tags')
@ -69,26 +78,23 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
    batch_sizes = util.compounding(util.env_opt('batch_from', 1),
                                   util.env_opt('batch_to', 64),
                                   util.env_opt('batch_compound', 1.001))
-
-    if not resume:
-        lang_class = util.get_lang_class(lang)
-        nlp = lang_class(pipeline=pipeline)
-    else:
-        print("Load resume")
-        nlp = _resume_model(lang, pipeline)
-        lang_class = nlp.__class__
-
    corpus = GoldCorpus(train_path, dev_path, limit=n_sents)
    n_train_words = corpus.count_train()
+
+    lang_class = util.get_lang_class(lang)
+    nlp = lang_class(pipeline=pipeline)
+    if vectors:
+        util.load_model(vectors, vocab=nlp.vocab)
    optimizer = nlp.begin_training(lambda: corpus.train_tuples, device=use_gpu)
    nlp._optimizer = None

    print("Itn.\tLoss\tUAS\tNER P.\tNER R.\tNER F.\tTag %\tToken %")
    try:
+        train_docs = corpus.train_docs(nlp, projectivize=True, noise_level=0.0,
+                                       gold_preproc=gold_preproc, max_length=0)
+        train_docs = list(train_docs)
        for i in range(n_iter):
            with tqdm.tqdm(total=n_train_words, leave=False) as pbar:
-                train_docs = corpus.train_docs(nlp, projectivize=True, noise_level=0.0,
-                                               gold_preproc=gold_preproc, max_length=0)
                losses = {}
                for batch in minibatch(train_docs, size=batch_sizes):
                    docs, golds = zip(*batch)
@ -103,32 +109,30 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
                nlp.to_disk(epoch_model_path)
                nlp_loaded = lang_class(pipeline=pipeline)
                nlp_loaded = nlp_loaded.from_disk(epoch_model_path)
-                scorer = nlp_loaded.evaluate(
+                scorer = nlp.evaluate(
                            corpus.dev_docs(
-                                nlp_loaded,
+                                nlp,
                                gold_preproc=gold_preproc))
-                acc_loc =(output_path / ('model%d' % i) / 'accuracy.json')
-                with acc_loc.open('w') as file_:
-                    file_.write(json_dumps(scorer.scores))
+                meta_loc = output_path / ('model%d' % i) / 'meta.json'
+                meta['accuracy'] = scorer.scores
+                meta['lang'] = nlp.lang
+                meta['pipeline'] = pipeline
+                meta['spacy_version'] = '>=%s' % about.__version__
+                meta.setdefault('name', 'model%d' % i)
+                meta.setdefault('version', '0.0.0')
+
+                with meta_loc.open('w') as file_:
+                    file_.write(json_dumps(meta))
                util.set_env_log(True)
            print_progress(i, losses, scorer.scores)
    finally:
        print("Saving model...")
-        with (output_path / 'model-final.pickle').open('wb') as file_:
-            with nlp.use_params(optimizer.averages):
-                dill.dump(nlp, file_, -1)
-
-
-def _resume_model(lang, pipeline):
-    nlp = util.load_model(lang)
-    pipes = {getattr(pipe, 'name', None) for pipe in nlp.pipeline}
-    for name in pipeline:
-        if name not in pipes:
-            factory = nlp.Defaults.factories[name]
-            nlp.pipeline.extend(factory(nlp))
-    nlp.meta['pipeline'] = pipeline
-    return nlp
-
+        try:
+            with (output_path / 'model-final.pickle').open('wb') as file_:
+                with nlp.use_params(optimizer.averages):
+                    dill.dump(nlp, file_, -1)
+        except:
+            pass

 def _render_parses(i, to_render):
    to_render[0].user_data['title'] = "Batch %d" % i
--- a/spacy/language.py
+++ b/spacy/language.py
@ -342,7 +342,27 @@ class Language(object):
        for doc, gold in docs_golds:
            yield doc, gold

-    def begin_training(self, get_gold_tuples, **cfg):
+    def resume_training(self, **cfg):
+        if cfg.get('device', -1) >= 0:
+            device = util.use_gpu(cfg['device'])
+            if self.vocab.vectors.data.shape[1] >= 1:
+                self.vocab.vectors.data = Model.ops.asarray(
+                    self.vocab.vectors.data)
+        else:
+            device = None
+        learn_rate = util.env_opt('learn_rate', 0.001)
+        beta1 = util.env_opt('optimizer_B1', 0.9)
+        beta2 = util.env_opt('optimizer_B2', 0.999)
+        eps = util.env_opt('optimizer_eps', 1e-08)
+        L2 = util.env_opt('L2_penalty', 1e-6)
+        max_grad_norm = util.env_opt('grad_norm_clip', 1.)
+        self._optimizer = Adam(Model.ops, learn_rate, L2=L2, beta1=beta1,
+                              beta2=beta2, eps=eps)
+        self._optimizer.max_grad_norm = max_grad_norm
+        self._optimizer.device = device
+        return self._optimizer
+
+    def begin_training(self, get_gold_tuples=None, **cfg):
        """Allocate models, pre-process training data and acquire a trainer and
        optimizer. Used as a contextmanager.

@ -353,17 +373,14 @@ class Language(object):
        if self.parser:
            self.pipeline.append(NeuralLabeller(self.vocab))
        # Populate vocab
-        for _, annots_brackets in get_gold_tuples():
-            for annots, _ in annots_brackets:
-                for word in annots[1]:
-                    _ = self.vocab[word]
+        if get_gold_tuples is not None:
+            for _, annots_brackets in get_gold_tuples():
+                for annots, _ in annots_brackets:
+                    for word in annots[1]:
+                        _ = self.vocab[word]
        contexts = []
        if cfg.get('device', -1) >= 0:
-            import cupy.cuda.device
-            device = cupy.cuda.device.Device(cfg['device'])
-            device.use()
-            Model.ops = CupyOps()
-            Model.Ops = CupyOps
+            device = util.use_gpu(cfg['device'])
            if self.vocab.vectors.data.shape[1] >= 1:
                self.vocab.vectors.data = Model.ops.asarray(
                    self.vocab.vectors.data)
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@ -43,11 +43,12 @@ from .compat import json_dumps
 from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP, POS
 from ._ml import rebatch, Tok2Vec, flatten
 from ._ml import build_text_classifier, build_tagger_model
+from ._ml import link_vectors_to_models
 from .parts_of_speech import X


 class SentenceSegmenter(object):
-    '''A simple spaCy hook, to allow custom sentence boundary detection logic
+    """A simple spaCy hook, to allow custom sentence boundary detection logic
    (that doesn't require the dependency parse).

    To change the sentence boundary detection strategy, pass a generator
@ -56,7 +57,7 @@ class SentenceSegmenter(object):

    Sentence detection strategies should be generators that take `Doc` objects
    and yield `Span` objects for each sentence.
-    '''
+    """
    name = 'sbd'

    def __init__(self, vocab, strategy=None):
@ -88,17 +89,30 @@ class BaseThincComponent(object):

    @classmethod
    def Model(cls, *shape, **kwargs):
+        """Initialize a model for the pipe."""
        raise NotImplementedError

    def __init__(self, vocab, model=True, **cfg):
+        """Create a new pipe instance."""
        raise NotImplementedError

    def __call__(self, doc):
+        """Apply the pipe to one document. The document is
+        modified in-place, and returned.
+
+        Both __call__ and pipe should delegate to the `predict()`
+        and `set_annotations()` methods.
+        """
        scores = self.predict([doc])
        self.set_annotations([doc], scores)
        return doc

    def pipe(self, stream, batch_size=128, n_threads=-1):
+        """Apply the pipe to a stream of documents.
+
+        Both __call__ and pipe should delegate to the `predict()`
+        and `set_annotations()` methods.
+        """
        for docs in cytoolz.partition_all(batch_size, stream):
            docs = list(docs)
            scores = self.predict(docs)
@ -106,27 +120,43 @@ class BaseThincComponent(object):
            yield from docs

    def predict(self, docs):
+        """Apply the pipeline's model to a batch of docs, without
+        modifying them.
+        """
        raise NotImplementedError

    def set_annotations(self, docs, scores):
+        """Modify a batch of documents, using pre-computed scores."""
        raise NotImplementedError

-    def update(self, docs_tensors, golds, state=None, drop=0., sgd=None, losses=None):
+    def update(self, docs, golds, drop=0., sgd=None, losses=None):
+        """Learn from a batch of documents and gold-standard information,
+        updating the pipe's model.
+
+        Delegates to predict() and get_loss().
+        """
        raise NotImplementedError

    def get_loss(self, docs, golds, scores):
+        """Find the loss and gradient of loss for the batch of
+        documents and their predicted scores."""
        raise NotImplementedError

    def begin_training(self, gold_tuples=tuple(), pipeline=None):
-        token_vector_width = pipeline[0].model.nO
+        """Initialize the pipe for training, using data exampes if available.
+        If no model has been initialized yet, the model is added."""
        if self.model is True:
-            self.model = self.Model(1, token_vector_width)
+            self.model = self.Model(**self.cfg)
+        link_vectors_to_models(self.vocab)

    def use_params(self, params):
+        """Modify the pipe's model, to use the given parameter values.
+        """
        with self.model.use_params(params):
            yield

    def to_bytes(self, **exclude):
+        """Serialize the pipe to a bytestring."""
        serialize = OrderedDict((
            ('cfg', lambda: json_dumps(self.cfg)),
            ('model', lambda: self.model.to_bytes()),
@ -135,6 +165,7 @@ class BaseThincComponent(object):
        return util.to_bytes(serialize, exclude)

    def from_bytes(self, bytes_data, **exclude):
+        """Load the pipe from a bytestring."""
        def load_model(b):
            if self.model is True:
                self.cfg['pretrained_dims'] = self.vocab.vectors_length
@ -143,21 +174,23 @@ class BaseThincComponent(object):

        deserialize = OrderedDict((
            ('cfg', lambda b: self.cfg.update(ujson.loads(b))),
-            ('model', load_model),
            ('vocab', lambda b: self.vocab.from_bytes(b))
+            ('model', load_model),
        ))
        util.from_bytes(bytes_data, deserialize, exclude)
        return self

    def to_disk(self, path, **exclude):
+        """Serialize the pipe to disk."""
        serialize = OrderedDict((
            ('cfg', lambda p: p.open('w').write(json_dumps(self.cfg))),
+            ('vocab', lambda p: self.vocab.to_disk(p)),
            ('model', lambda p: p.open('wb').write(self.model.to_bytes())),
-            ('vocab', lambda p: self.vocab.to_disk(p))
        ))
        util.to_disk(path, serialize, exclude)

    def from_disk(self, path, **exclude):
+        """Load the pipe from disk."""
        def load_model(p):
            if self.model is True:
                self.cfg['pretrained_dims'] = self.vocab.vectors_length
@ -166,8 +199,8 @@ class BaseThincComponent(object):

        deserialize = OrderedDict((
            ('cfg', lambda p: self.cfg.update(_load_cfg(p))),
-            ('model', load_model),
            ('vocab', lambda p: self.vocab.from_disk(p)),
+            ('model', load_model),
        ))
        util.from_disk(path, deserialize, exclude)
        return self
@ -215,6 +248,7 @@ class TokenVectorEncoder(BaseThincComponent):
        self.model = model
        self.cfg = dict(cfg)
        self.cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1]
+        self.cfg.setdefault('cnn_maxout_pieces', 3)

    def __call__(self, doc):
        """Add context-sensitive vectors to a `Doc`, e.g. from a CNN or LSTM
@ -286,9 +320,9 @@ class TokenVectorEncoder(BaseThincComponent):
        pipeline (list): The pipeline the model is part of.
        """
        if self.model is True:
-            self.model = self.Model(
-                pretrained_dims=self.vocab.vectors_length,
-                **self.cfg)
+            self.cfg['pretrained_dims'] = self.vocab.vectors_length
+            self.model = self.Model(**self.cfg)
+            link_vectors_to_models(self.vocab)


 class NeuralTagger(BaseThincComponent):
@ -297,6 +331,8 @@ class NeuralTagger(BaseThincComponent):
        self.vocab = vocab
        self.model = model
        self.cfg = dict(cfg)
+        self.cfg.setdefault('cnn_maxout_pieces', 2)
+        self.cfg.setdefault('pretrained_dims', self.vocab.vectors.data.shape[1])

    def __call__(self, doc):
        tags = self.predict(([doc], [doc.tensor]))
@ -393,15 +429,14 @@ class NeuralTagger(BaseThincComponent):
            vocab.morphology = Morphology(vocab.strings, new_tag_map,
                                          vocab.morphology.lemmatizer,
                                          exc=vocab.morphology.exc)
-        token_vector_width = pipeline[0].model.nO
        if self.model is True:
-            self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width,
-                                    pretrained_dims=self.vocab.vectors_length)
+            self.cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1]
+            self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
+            link_vectors_to_models(self.vocab)

    @classmethod
-    def Model(cls, n_tags, token_vector_width, pretrained_dims=0):
-        return build_tagger_model(n_tags, token_vector_width,
-                                  pretrained_dims)
+    def Model(cls, n_tags, **cfg):
+        return build_tagger_model(n_tags, **cfg)

    def use_params(self, params):
        with self.model.use_params(params):
@ -422,8 +457,7 @@ class NeuralTagger(BaseThincComponent):
            if self.model is True:
                token_vector_width = util.env_opt('token_vector_width',
                        self.cfg.get('token_vector_width', 128))
-                self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width,
-                                        pretrained_dims=self.vocab.vectors_length)
+                self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
            self.model.from_bytes(b)

        def load_tag_map(b):
@ -442,6 +476,7 @@ class NeuralTagger(BaseThincComponent):
        return self

    def to_disk(self, path, **exclude):
+        self.cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1]
        serialize = OrderedDict((
            ('vocab', lambda p: self.vocab.to_disk(p)),
            ('tag_map', lambda p: p.open('wb').write(msgpack.dumps(
@ -456,10 +491,7 @@ class NeuralTagger(BaseThincComponent):
    def from_disk(self, path, **exclude):
        def load_model(p):
            if self.model is True:
-                token_vector_width = util.env_opt('token_vector_width',
-                        self.cfg.get('token_vector_width', 128))
-                self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width,
-                                        **self.cfg)
+                self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
            self.model.from_bytes(p.open('rb').read())

        def load_tag_map(p):
@ -486,6 +518,8 @@ class NeuralLabeller(NeuralTagger):
        self.vocab = vocab
        self.model = model
        self.cfg = dict(cfg)
+        self.cfg.setdefault('cnn_maxout_pieces', 2)
+        self.cfg.setdefault('pretrained_dims', self.vocab.vectors.data.shape[1])

    @property
    def labels(self):
@ -508,13 +542,13 @@ class NeuralLabeller(NeuralTagger):
                        self.labels[dep] = len(self.labels)
        token_vector_width = pipeline[0].model.nO
        if self.model is True:
-            self.model = self.Model(len(self.labels), token_vector_width,
-                                    pretrained_dims=self.vocab.vectors_length)
+            self.cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1]
+            self.model = self.Model(len(self.labels), **self.cfg)
+            link_vectors_to_models(self.vocab)

    @classmethod
-    def Model(cls, n_tags, token_vector_width, pretrained_dims=0):
-        return build_tagger_model(n_tags, token_vector_width,
-                                  pretrained_dims)
+    def Model(cls, n_tags, **cfg):
+        return build_tagger_model(n_tags, **cfg)

    def get_loss(self, docs, golds, scores):
        scores = self.model.ops.flatten(scores)
@ -562,7 +596,7 @@ class SimilarityHook(BaseThincComponent):
        return Siamese(Pooling(max_pool, mean_pool), CauchySimilarity(length))

    def __call__(self, doc):
-        '''Install similarity hook'''
+        """Install similarity hook"""
        doc.user_hooks['similarity'] = self.predict
        return doc

@ -590,6 +624,7 @@ class SimilarityHook(BaseThincComponent):
        """
        if self.model is True:
            self.model = self.Model(pipeline[0].model.nO)
+            link_vectors_to_models(self.vocab)


 class TextCategorizer(BaseThincComponent):
@ -663,6 +698,7 @@ class TextCategorizer(BaseThincComponent):
            self.cfg['pretrained_dims'] = self.vocab.vectors_length
            self.model = self.Model(len(self.labels), token_vector_width,
                                    **self.cfg)
+            link_vectors_to_models(self.vocab)


 cdef class EntityRecognizer(LinearParser):
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -49,6 +49,7 @@ from ..util import get_async, get_cuda_stream
 from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts
 from .._ml import Tok2Vec, doc2feats, rebatch, fine_tune
 from .._ml import Residual, drop_layer
+from .._ml import link_vectors_to_models
 from ..compat import json_dumps

 from . import _parse_features
@ -309,6 +310,7 @@ cdef class Parser:
            cfg['beam_density'] = util.env_opt('beam_density', 0.0)
        if 'pretrained_dims' not in cfg:
            cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1]
+        cfg.setdefault('cnn_maxout_pieces', 3)
        self.cfg = cfg
        if 'actions' in self.cfg:
            for action, labels in self.cfg.get('actions', {}).items():
@ -790,6 +792,7 @@ cdef class Parser:
        if self.model is True:
            cfg['pretrained_dims'] = self.vocab.vectors_length
            self.model, cfg = self.Model(self.moves.n_moves, **cfg)
+            link_vectors_to_models(self.vocab)
            self.cfg.update(cfg)

    def preprocess_gold(self, docs_golds):
@ -871,8 +874,7 @@ cdef class Parser:
        msg = util.from_bytes(bytes_data, deserializers, exclude)
        if 'model' not in exclude:
            if self.model is True:
-                self.model, cfg = self.Model(self.moves.n_moves,
-                                    pretrained_dims=self.vocab.vectors_length)
+                self.model, cfg = self.Model(**self.cfg)
                cfg['pretrained_dims'] = self.vocab.vectors_length
            else:
                cfg = {}
--- a/spacy/util.py
+++ b/spacy/util.py
@ -14,6 +14,7 @@ import numpy
 import io
 import dill
 from collections import OrderedDict
+from thinc.neural._classes.model import Model

 import msgpack
 import msgpack_numpy
@ -557,3 +558,14 @@ def minify_html(html):
    RETURNS (unicode): "Minified" HTML.
    """
    return html.strip().replace('    ', '').replace('\n', '')
+
+
+def use_gpu(gpu_id):
+    import cupy.cuda.device
+    from thinc.neural.ops import CupyOps
+    device = cupy.cuda.device.Device(gpu_id)
+    device.use()
+    Model.ops = CupyOps()
+    Model.Ops = CupyOps
+    return device
+
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -27,6 +27,7 @@ from .vectors import Vectors
 from . import util
 from . import attrs
 from . import symbols
+from ._ml import link_vectors_to_models


 cdef class Vocab:
@ -323,6 +324,7 @@ cdef class Vocab:
            self.lexemes_from_bytes(file_.read())
        if self.vectors is not None:
            self.vectors.from_disk(path, exclude='strings.json')
+        link_vectors_to_models(self)
        return self

    def to_bytes(self, **exclude):
@ -362,6 +364,7 @@ cdef class Vocab:
            ('vectors', lambda b: serialize_vectors(b))
        ))
        util.from_bytes(bytes_data, setters, exclude)
+        link_vectors_to_models(self)
        return self

    def lexemes_to_bytes(self):
@ -436,6 +439,7 @@ def unpickle_vocab(sstore, morphology, data_dir,
    vocab.lex_attr_getters = lex_attr_getters
    vocab.lexemes_from_bytes(lexemes_data)
    vocab.length = length
+    link_vectors_to_models(vocab)
    return vocab


--- a/travis.sh
+++ b/travis.sh
@ -17,7 +17,7 @@ fi

 if [ "${VIA}" == "compile" ]; then
  pip install -r requirements.txt
-  python setup.py build_ext --inplace
+  python setup.py clean --all
  pip install -e .
 fi