From f1461210922e55f2419ab36b5e7cac051c346cf1 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 14 Feb 2019 20:03:19 +0100
Subject: [PATCH] =?UTF-8?q?=F0=9F=92=AB=20=20Make=20handling=20of=20[Pipe]?=
 =?UTF-8?q?.labels=20consistent=20=20(#3273)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Make handling of [Pipe].labels consistent

* Un-xfail passing test

* Update spacy/pipeline/pipes.pyx

Co-Authored-By: ines <ines@ines.io>

* Update spacy/pipeline/pipes.pyx

Co-Authored-By: ines <ines@ines.io>

* Update spacy/tests/pipeline/test_pipe_methods.py

Co-Authored-By: ines <ines@ines.io>

* Update spacy/pipeline/pipes.pyx

Co-Authored-By: ines <ines@ines.io>

* Move error message to spacy.errors

* Fix textcat labels and test

* Make EntityRuler.labels return tuple as well
---
 spacy/errors.py                               |  4 ++++
 spacy/pipeline/entityruler.py                 |  2 +-
 spacy/pipeline/pipes.pyx                      | 23 ++++++++++---------
 spacy/tests/pipeline/test_pipe_methods.py     | 12 ++++++++++
 spacy/tests/regression/test_issue2001-2500.py |  3 +--
 5 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index f689963dd..37deb4560 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -317,6 +317,10 @@ class Errors(object):
     E113 = ("The newly split token can only have one root (head = 0).")
     E114 = ("The newly split token needs to have a root (head = 0)")
     E115 = ("All subtokens must have associated heads")
+    E116 = ("Cannot currently add labels to pre-trained text classifier. Add "
+            "labels before training begins. This functionality was available "
+            "in previous versions, but had significant bugs that led to poor "
+            "performance")
 
 
 @add_codes
diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py
index bef929411..6b757fe7c 100644
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@@ -86,7 +86,7 @@ class EntityRuler(object):
         """
         all_labels = set(self.token_patterns.keys())
         all_labels.update(self.phrase_patterns.keys())
-        return all_labels
+        return tuple(all_labels)
 
     @property
     def patterns(self):
diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx
index 92fecb8b3..153e053e2 100644
--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@@ -358,7 +358,7 @@ class Tagger(Pipe):
 
     @property
     def labels(self):
-        return self.vocab.morphology.tag_names
+        return tuple(self.vocab.morphology.tag_names)
 
     @property
     def tok2vec(self):
@@ -884,11 +884,11 @@ class TextCategorizer(Pipe):
 
     @property
     def labels(self):
-        return self.cfg.setdefault('labels', [])
+        return tuple(self.cfg.setdefault('labels', []))
 
     @labels.setter
     def labels(self, value):
-        self.cfg['labels'] = value
+        self.cfg['labels'] = tuple(value)
 
     def __call__(self, doc):
         scores, tensors = self.predict([doc])
@@ -957,17 +957,13 @@ class TextCategorizer(Pipe):
             # The problem is that we resize the last layer, but the last layer
             # is actually just an ensemble. We're not resizing the child layers
             # -- a huge problem.
-            raise ValueError(
-                "Cannot currently add labels to pre-trained text classifier. "
-                "Add labels before training begins. This functionality was "
-                "available in previous versions, but had significant bugs that "
-                "let to poor performance")
+            raise ValueError(Errors.E116)
             #smaller = self.model._layers[-1]
             #larger = Affine(len(self.labels)+1, smaller.nI)
             #copy_array(larger.W[:smaller.nO], smaller.W)
             #copy_array(larger.b[:smaller.nO], smaller.b)
             #self.model._layers[-1] = larger
-        self.labels.append(label)
+        self.labels = tuple(list(self.labels) + [label])
         return 1
 
     def begin_training(self, get_gold_tuples=lambda: [], pipeline=None, sgd=None,
@@ -1012,6 +1008,11 @@ cdef class DependencyParser(Parser):
         return (DependencyParser, (self.vocab, self.moves, self.model),
                 None, None)
 
+    @property
+    def labels(self):
+        # Get the labels from the model by looking at the available moves
+        return tuple(set(move.split("-")[1] for move in self.move_names))
+
 
 cdef class EntityRecognizer(Parser):
     name = "ner"
@@ -1040,8 +1041,8 @@ cdef class EntityRecognizer(Parser):
     def labels(self):
         # Get the labels from the model by looking at the available moves, e.g.
         # B-PERSON, I-PERSON, L-PERSON, U-PERSON
-        return [move.split("-")[1] for move in self.move_names
-                if move[0] in ("B", "I", "L", "U")]
+        return tuple(set(move.split("-")[1] for move in self.move_names
+                if move[0] in ("B", "I", "L", "U")))
 
 
 __all__ = ['Tagger', 'DependencyParser', 'EntityRecognizer', 'Tensorizer', 'TextCategorizer']
diff --git a/spacy/tests/pipeline/test_pipe_methods.py b/spacy/tests/pipeline/test_pipe_methods.py
index bd779d5c2..d36201718 100644
--- a/spacy/tests/pipeline/test_pipe_methods.py
+++ b/spacy/tests/pipeline/test_pipe_methods.py
@@ -112,3 +112,15 @@ def test_add_lots_of_pipes(nlp, n_pipes):
 def test_raise_for_invalid_components(nlp, component):
     with pytest.raises(ValueError):
         nlp.add_pipe(component)
+
+
+@pytest.mark.parametrize("component", ["ner", "tagger", "parser", "textcat"])
+def test_pipe_base_class_add_label(nlp, component):
+    label = "TEST"
+    pipe = nlp.create_pipe(component)
+    pipe.add_label(label)
+    if component == "tagger":
+        # Tagger always has the default coarse-grained label scheme
+        assert label in pipe.labels
+    else:
+        assert pipe.labels == (label,)
diff --git a/spacy/tests/regression/test_issue2001-2500.py b/spacy/tests/regression/test_issue2001-2500.py
index 70040898f..3922db6d3 100644
--- a/spacy/tests/regression/test_issue2001-2500.py
+++ b/spacy/tests/regression/test_issue2001-2500.py
@@ -11,7 +11,6 @@ import numpy
 from ..util import add_vecs_to_vocab, get_doc
 
 
-@pytest.mark.xfail
 def test_issue2179():
     """Test that spurious 'extra_labels' aren't created when initializing NER."""
     nlp = Italian()
@@ -23,7 +22,7 @@ def test_issue2179():
     nlp2.add_pipe(nlp2.create_pipe("ner"))
     nlp2.from_bytes(nlp.to_bytes())
     assert "extra_labels" not in nlp2.get_pipe("ner").cfg
-    assert nlp2.get_pipe("ner").labels == ["CITIZENSHIP"]
+    assert nlp2.get_pipe("ner").labels == ("CITIZENSHIP",)
 
 
 def test_issue2219(en_vocab):