From b30dd361798ab7aa764fa2f75153f4367e4b17fb Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Wed, 1 Nov 2017 21:49:24 +0100
Subject: [PATCH] Allow Tagger.add_label() before training

---
 spacy/pipeline.pyx | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx
index be6804c93..40014ce03 100644
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@@ -431,18 +431,31 @@ class Tagger(Pipe):
     def Model(cls, n_tags, **cfg):
         return build_tagger_model(n_tags, **cfg)
 
-    def add_label(self, label):
+    def add_label(self, label, values=None):
         if label in self.labels:
             return 0
-        raise NotImplementedError
-        #if self.model not in (True, False, None):
-        #    smaller = self.model._layers[-1]
-        #    larger = Softmax(len(self.labels)+1, smaller.nI)
-        #    copy_array(larger.W[:smaller.nO], smaller.W)
-        #    copy_array(larger.b[:smaller.nO], smaller.b)
-        #    self.model._layers[-1] = larger
-        #self.labels.append(label)
-        #return 1
+        if self.model not in (True, False, None):
+            # Here's how the model resizing will work, once the
+            # neuron-to-tag mapping is no longer controlled by
+            # the Morphology class, which sorts the tag names.
+            # The sorting makes adding labels difficult.
+            # smaller = self.model._layers[-1]
+            # larger = Softmax(len(self.labels)+1, smaller.nI)
+            # copy_array(larger.W[:smaller.nO], smaller.W)
+            # copy_array(larger.b[:smaller.nO], smaller.b)
+            # self.model._layers[-1] = larger
+            raise ValueError(
+                "Resizing pre-trained Tagger models is not "
+                "currently supported.")
+        tag_map = dict(self.vocab.morphology.tag_map)
+        if values is None:
+            values = {POS: "X"}
+        tag_map[label] = values
+        self.vocab.morphology = Morphology(
+            self.vocab.strings, tag_map=tag_map,
+            lemmatizer=self.vocab.morphology.lemmatizer,
+            exc=self.vocab.morphology.exc)
+        return 1
 
     def use_params(self, params):
         with self.model.use_params(params):