mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
💫 Make handling of [Pipe].labels consistent (#3273)
* Make handling of [Pipe].labels consistent * Un-xfail passing test * Update spacy/pipeline/pipes.pyx Co-Authored-By: ines <ines@ines.io> * Update spacy/pipeline/pipes.pyx Co-Authored-By: ines <ines@ines.io> * Update spacy/tests/pipeline/test_pipe_methods.py Co-Authored-By: ines <ines@ines.io> * Update spacy/pipeline/pipes.pyx Co-Authored-By: ines <ines@ines.io> * Move error message to spacy.errors * Fix textcat labels and test * Make EntityRuler.labels return tuple as well
This commit is contained in:
parent
3d577b77c6
commit
f146121092
|
@ -317,6 +317,10 @@ class Errors(object):
|
||||||
E113 = ("The newly split token can only have one root (head = 0).")
|
E113 = ("The newly split token can only have one root (head = 0).")
|
||||||
E114 = ("The newly split token needs to have a root (head = 0)")
|
E114 = ("The newly split token needs to have a root (head = 0)")
|
||||||
E115 = ("All subtokens must have associated heads")
|
E115 = ("All subtokens must have associated heads")
|
||||||
|
E116 = ("Cannot currently add labels to pre-trained text classifier. Add "
|
||||||
|
"labels before training begins. This functionality was available "
|
||||||
|
"in previous versions, but had significant bugs that led to poor "
|
||||||
|
"performance")
|
||||||
|
|
||||||
|
|
||||||
@add_codes
|
@add_codes
|
||||||
|
|
|
@ -86,7 +86,7 @@ class EntityRuler(object):
|
||||||
"""
|
"""
|
||||||
all_labels = set(self.token_patterns.keys())
|
all_labels = set(self.token_patterns.keys())
|
||||||
all_labels.update(self.phrase_patterns.keys())
|
all_labels.update(self.phrase_patterns.keys())
|
||||||
return all_labels
|
return tuple(all_labels)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def patterns(self):
|
def patterns(self):
|
||||||
|
|
|
@ -358,7 +358,7 @@ class Tagger(Pipe):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def labels(self):
|
def labels(self):
|
||||||
return self.vocab.morphology.tag_names
|
return tuple(self.vocab.morphology.tag_names)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tok2vec(self):
|
def tok2vec(self):
|
||||||
|
@ -884,11 +884,11 @@ class TextCategorizer(Pipe):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def labels(self):
|
def labels(self):
|
||||||
return self.cfg.setdefault('labels', [])
|
return tuple(self.cfg.setdefault('labels', []))
|
||||||
|
|
||||||
@labels.setter
|
@labels.setter
|
||||||
def labels(self, value):
|
def labels(self, value):
|
||||||
self.cfg['labels'] = value
|
self.cfg['labels'] = tuple(value)
|
||||||
|
|
||||||
def __call__(self, doc):
|
def __call__(self, doc):
|
||||||
scores, tensors = self.predict([doc])
|
scores, tensors = self.predict([doc])
|
||||||
|
@ -957,17 +957,13 @@ class TextCategorizer(Pipe):
|
||||||
# The problem is that we resize the last layer, but the last layer
|
# The problem is that we resize the last layer, but the last layer
|
||||||
# is actually just an ensemble. We're not resizing the child layers
|
# is actually just an ensemble. We're not resizing the child layers
|
||||||
# -- a huge problem.
|
# -- a huge problem.
|
||||||
raise ValueError(
|
raise ValueError(Errors.E116)
|
||||||
"Cannot currently add labels to pre-trained text classifier. "
|
|
||||||
"Add labels before training begins. This functionality was "
|
|
||||||
"available in previous versions, but had significant bugs that "
|
|
||||||
"let to poor performance")
|
|
||||||
#smaller = self.model._layers[-1]
|
#smaller = self.model._layers[-1]
|
||||||
#larger = Affine(len(self.labels)+1, smaller.nI)
|
#larger = Affine(len(self.labels)+1, smaller.nI)
|
||||||
#copy_array(larger.W[:smaller.nO], smaller.W)
|
#copy_array(larger.W[:smaller.nO], smaller.W)
|
||||||
#copy_array(larger.b[:smaller.nO], smaller.b)
|
#copy_array(larger.b[:smaller.nO], smaller.b)
|
||||||
#self.model._layers[-1] = larger
|
#self.model._layers[-1] = larger
|
||||||
self.labels.append(label)
|
self.labels = tuple(list(self.labels) + [label])
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
def begin_training(self, get_gold_tuples=lambda: [], pipeline=None, sgd=None,
|
def begin_training(self, get_gold_tuples=lambda: [], pipeline=None, sgd=None,
|
||||||
|
@ -1012,6 +1008,11 @@ cdef class DependencyParser(Parser):
|
||||||
return (DependencyParser, (self.vocab, self.moves, self.model),
|
return (DependencyParser, (self.vocab, self.moves, self.model),
|
||||||
None, None)
|
None, None)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def labels(self):
|
||||||
|
# Get the labels from the model by looking at the available moves
|
||||||
|
return tuple(set(move.split("-")[1] for move in self.move_names))
|
||||||
|
|
||||||
|
|
||||||
cdef class EntityRecognizer(Parser):
|
cdef class EntityRecognizer(Parser):
|
||||||
name = "ner"
|
name = "ner"
|
||||||
|
@ -1040,8 +1041,8 @@ cdef class EntityRecognizer(Parser):
|
||||||
def labels(self):
|
def labels(self):
|
||||||
# Get the labels from the model by looking at the available moves, e.g.
|
# Get the labels from the model by looking at the available moves, e.g.
|
||||||
# B-PERSON, I-PERSON, L-PERSON, U-PERSON
|
# B-PERSON, I-PERSON, L-PERSON, U-PERSON
|
||||||
return [move.split("-")[1] for move in self.move_names
|
return tuple(set(move.split("-")[1] for move in self.move_names
|
||||||
if move[0] in ("B", "I", "L", "U")]
|
if move[0] in ("B", "I", "L", "U")))
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['Tagger', 'DependencyParser', 'EntityRecognizer', 'Tensorizer', 'TextCategorizer']
|
__all__ = ['Tagger', 'DependencyParser', 'EntityRecognizer', 'Tensorizer', 'TextCategorizer']
|
||||||
|
|
|
@ -112,3 +112,15 @@ def test_add_lots_of_pipes(nlp, n_pipes):
|
||||||
def test_raise_for_invalid_components(nlp, component):
|
def test_raise_for_invalid_components(nlp, component):
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
nlp.add_pipe(component)
|
nlp.add_pipe(component)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("component", ["ner", "tagger", "parser", "textcat"])
|
||||||
|
def test_pipe_base_class_add_label(nlp, component):
|
||||||
|
label = "TEST"
|
||||||
|
pipe = nlp.create_pipe(component)
|
||||||
|
pipe.add_label(label)
|
||||||
|
if component == "tagger":
|
||||||
|
# Tagger always has the default coarse-grained label scheme
|
||||||
|
assert label in pipe.labels
|
||||||
|
else:
|
||||||
|
assert pipe.labels == (label,)
|
||||||
|
|
|
@ -11,7 +11,6 @@ import numpy
|
||||||
from ..util import add_vecs_to_vocab, get_doc
|
from ..util import add_vecs_to_vocab, get_doc
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.xfail
|
|
||||||
def test_issue2179():
|
def test_issue2179():
|
||||||
"""Test that spurious 'extra_labels' aren't created when initializing NER."""
|
"""Test that spurious 'extra_labels' aren't created when initializing NER."""
|
||||||
nlp = Italian()
|
nlp = Italian()
|
||||||
|
@ -23,7 +22,7 @@ def test_issue2179():
|
||||||
nlp2.add_pipe(nlp2.create_pipe("ner"))
|
nlp2.add_pipe(nlp2.create_pipe("ner"))
|
||||||
nlp2.from_bytes(nlp.to_bytes())
|
nlp2.from_bytes(nlp.to_bytes())
|
||||||
assert "extra_labels" not in nlp2.get_pipe("ner").cfg
|
assert "extra_labels" not in nlp2.get_pipe("ner").cfg
|
||||||
assert nlp2.get_pipe("ner").labels == ["CITIZENSHIP"]
|
assert nlp2.get_pipe("ner").labels == ("CITIZENSHIP",)
|
||||||
|
|
||||||
|
|
||||||
def test_issue2219(en_vocab):
|
def test_issue2219(en_vocab):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user