diff --git a/examples/training/train_textcat.py b/examples/training/train_textcat.py
index 852635075..6fa79e75b 100644
--- a/examples/training/train_textcat.py
+++ b/examples/training/train_textcat.py
@@ -26,8 +26,9 @@ from spacy.pipeline import TextCategorizer
 @plac.annotations(
     model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
     output_dir=("Optional output directory", "option", "o", Path),
+    n_examples=("Number of texts to train from", "option", "N", int),
     n_iter=("Number of training iterations", "option", "n", int))
-def main(model=None, output_dir=None, n_iter=20):
+def main(model=None, output_dir=None, n_iter=20, n_texts=2000):
     if model is not None:
         nlp = spacy.load(model)  # load existing spaCy model
         print("Loaded model '%s'" % model)
@@ -50,7 +51,8 @@ def main(model=None, output_dir=None, n_iter=20):
 
     # load the IMBD dataset
     print("Loading IMDB data...")
-    (train_texts, train_cats), (dev_texts, dev_cats) = load_data(limit=2000)
+    print("Using %d training examples" % n_texts)
+    (train_texts, train_cats), (dev_texts, dev_cats) = load_data(limit=n_texts)
     train_docs = [nlp.tokenizer(text) for text in train_texts]
     train_gold = [GoldParse(doc, cats=cats) for doc, cats in
                   zip(train_docs, train_cats)]
@@ -65,14 +67,14 @@ def main(model=None, output_dir=None, n_iter=20):
         for i in range(n_iter):
             losses = {}
             # batch up the examples using spaCy's minibatch
-            batches = minibatch(train_data, size=compounding(4., 128., 1.001))
+            batches = minibatch(train_data, size=compounding(4., 32., 1.001))
             for batch in batches:
                 docs, golds = zip(*batch)
                 nlp.update(docs, golds, sgd=optimizer, drop=0.2, losses=losses)
             with textcat.model.use_params(optimizer.averages):
                 # evaluate on the dev data split off in load_data()
                 scores = evaluate(nlp.tokenizer, textcat, dev_texts, dev_cats)
-            print('{0:.3f}\t{0:.3f}\t{0:.3f}\t{0:.3f}'  # print a simple table
+            print('{0:.3f}\t{1:.3f}\t{2:.3f}\t{3:.3f}'  # print a simple table
                   .format(losses['textcat'], scores['textcat_p'],
                           scores['textcat_r'], scores['textcat_f']))
 
diff --git a/spacy/_ml.py b/spacy/_ml.py
index 6bfacb20a..89e3d8ac6 100644
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@@ -434,7 +434,7 @@ def build_text_classifier(nr_class, width=64, **cfg):
     pretrained_dims = cfg.get('pretrained_dims', 0)
     with Model.define_operators({'>>': chain, '+': add, '|': concatenate,
                                  '**': clone}):
-        if cfg.get('low_data'):
+        if cfg.get('low_data') and pretrained_dims:
             model = (
                 SpacyVectors
                 >> flatten_add_lengths
diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx
index 842e27069..a2321d1ad 100644
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@@ -11,9 +11,9 @@ import ujson
 import msgpack
 
 from thinc.api import chain
-from thinc.v2v import Softmax
+from thinc.v2v import Affine, Softmax
 from thinc.t2v import Pooling, max_pool, mean_pool
-from thinc.neural.util import to_categorical
+from thinc.neural.util import to_categorical, copy_array
 from thinc.neural._classes.difference import Siamese, CauchySimilarity
 
 from .tokens.doc cimport Doc
@@ -130,6 +130,15 @@ class Pipe(object):
         documents and their predicted scores."""
         raise NotImplementedError
 
+    def add_label(self, label):
+        """Add an output label, to be predicted by the model.
+
+        It's possible to extend pre-trained models with new labels,
+        but care should be taken to avoid the "catastrophic forgetting"
+        problem.
+        """
+        raise NotImplementedError
+
     def begin_training(self, gold_tuples=tuple(), pipeline=None):
         """Initialize the pipe for training, using data exampes if available.
         If no model has been initialized yet, the model is added."""
@@ -325,6 +334,14 @@ class Tagger(Pipe):
         self.cfg.setdefault('pretrained_dims',
                             self.vocab.vectors.data.shape[1])
 
+    @property
+    def labels(self):
+        return self.cfg.setdefault('tag_names', [])
+
+    @labels.setter
+    def labels(self, value):
+        self.cfg['tag_names'] = value
+
     def __call__(self, doc):
         tags = self.predict([doc])
         self.set_annotations([doc], tags)
@@ -352,6 +369,7 @@ class Tagger(Pipe):
         cdef Doc doc
         cdef int idx = 0
         cdef Vocab vocab = self.vocab
+        tags = list(self.labels)
         for i, doc in enumerate(docs):
             doc_tag_ids = batch_tag_ids[i]
             if hasattr(doc_tag_ids, 'get'):
@@ -359,7 +377,7 @@ class Tagger(Pipe):
             for j, tag_id in enumerate(doc_tag_ids):
                 # Don't clobber preset POS tags
                 if doc.c[j].tag == 0 and doc.c[j].pos == 0:
-                    vocab.morphology.assign_tag_id(&doc.c[j], tag_id)
+                    vocab.morphology.assign_tag(&doc.c[j], tags[tag_id])
                 idx += 1
         doc.is_tagged = True
 
@@ -420,6 +438,17 @@ class Tagger(Pipe):
     def Model(cls, n_tags, **cfg):
         return build_tagger_model(n_tags, **cfg)
 
+    def add_label(self, label):
+        if label in self.labels:
+            return 0
+        smaller = self.model[-1]._layers[-1]
+        larger = Softmax(len(self.labels)+1, smaller.nI)
+        copy_array(larger.W[:smaller.nO], smaller.W)
+        copy_array(larger.b[:smaller.nO], smaller.b)
+        self.model[-1]._layers[-1] = larger
+        self.labels.append(label)
+        return 1
+
     def use_params(self, params):
         with self.model.use_params(params):
             yield
@@ -675,7 +704,7 @@ class TextCategorizer(Pipe):
 
     @property
     def labels(self):
-        return self.cfg.get('labels', ['LABEL'])
+        return self.cfg.setdefault('labels', ['LABEL'])
 
     @labels.setter
     def labels(self, value):
@@ -727,6 +756,17 @@ class TextCategorizer(Pipe):
         mean_square_error = ((scores-truths)**2).sum(axis=1).mean()
         return mean_square_error, d_scores
 
+    def add_label(self, label):
+        if label in self.labels:
+            return 0
+        smaller = self.model[-1]._layers[-1]
+        larger = Affine(len(self.labels)+1, smaller.nI)
+        copy_array(larger.W[:smaller.nO], smaller.W)
+        copy_array(larger.b[:smaller.nO], smaller.b)
+        self.model[-1]._layers[-1] = larger
+        self.labels.append(label)
+        return 1
+
     def begin_training(self, gold_tuples=tuple(), pipeline=None):
         if pipeline and getattr(pipeline[0], 'name', None) == 'tensorizer':
             token_vector_width = pipeline[0].model.nO