Fix names of pipeline components

NeuralDependencyParser --> DependencyParser
NeuralEntityRecognizer --> EntityRecognizer
TokenVectorEncoder     --> Tensorizer
NeuralLabeller         --> MultitaskObjective
This commit is contained in:
Matthew Honnibal 2017-10-26 12:38:23 +02:00
parent b6b4f1aaf7
commit b0f3ea2200
11 changed files with 35 additions and 112 deletions

View File

@ -18,8 +18,8 @@ from .tagger import Tagger
from .lemmatizer import Lemmatizer
from .syntax.parser import get_templates
from .pipeline import NeuralDependencyParser, TokenVectorEncoder, NeuralTagger
from .pipeline import NeuralEntityRecognizer, SimilarityHook, TextCategorizer
from .pipeline import DependencyParser, Tensorizer, Tagger
from .pipeline import EntityRecognizer, SimilarityHook, TextCategorizer
from .compat import json_dumps, izip, copy_reg
from .scorer import Scorer
@ -75,9 +75,6 @@ class BaseDefaults(object):
infixes = tuple(TOKENIZER_INFIXES)
tag_map = dict(TAG_MAP)
tokenizer_exceptions = {}
parser_features = get_templates('parser')
entity_features = get_templates('ner')
tagger_features = Tagger.feature_templates # TODO -- fix this
stop_words = set()
lemma_rules = {}
lemma_exc = {}
@ -102,9 +99,9 @@ class Language(object):
factories = {
'tokenizer': lambda nlp: nlp.Defaults.create_tokenizer(nlp),
'tensorizer': lambda nlp, **cfg: TokenVectorEncoder(nlp.vocab, **cfg),
'tagger': lambda nlp, **cfg: NeuralTagger(nlp.vocab, **cfg),
'parser': lambda nlp, **cfg: NeuralDependencyParser(nlp.vocab, **cfg),
'ner': lambda nlp, **cfg: NeuralEntityRecognizer(nlp.vocab, **cfg),
'tagger': lambda nlp, **cfg: Tagger(nlp.vocab, **cfg),
'parser': lambda nlp, **cfg: DependencyParser(nlp.vocab, **cfg),
'ner': lambda nlp, **cfg: EntityRecognizer(nlp.vocab, **cfg),
'similarity': lambda nlp, **cfg: SimilarityHook(nlp.vocab, **cfg),
'textcat': lambda nlp, **cfg: TextCategorizer(nlp.vocab, **cfg)
}

View File

@ -1,21 +0,0 @@
from .syntax.parser cimport Parser
#from .syntax.beam_parser cimport BeamParser
from .syntax.ner cimport BiluoPushDown
from .syntax.arc_eager cimport ArcEager
from .tagger cimport Tagger
cdef class EntityRecognizer(Parser):
pass
cdef class DependencyParser(Parser):
pass
#cdef class BeamEntityRecognizer(BeamParser):
# pass
#
#
#cdef class BeamDependencyParser(BeamParser):
# pass

View File

@ -26,11 +26,8 @@ from thinc.neural.util import to_categorical
from thinc.neural._classes.difference import Siamese, CauchySimilarity
from .tokens.doc cimport Doc
from .syntax.parser cimport Parser as LinearParser
from .syntax.nn_parser cimport Parser as NeuralParser
from .syntax.nn_parser cimport Parser
from .syntax import nonproj
from .syntax.parser import get_templates as get_feature_templates
from .syntax.beam_parser cimport BeamParser
from .syntax.ner cimport BiluoPushDown
from .syntax.arc_eager cimport ArcEager
from .tagger import Tagger
@ -217,7 +214,7 @@ def _load_cfg(path):
return {}
class TokenVectorEncoder(BaseThincComponent):
class Tensorizer(BaseThincComponent):
"""Assign position-sensitive vectors to tokens, using a CNN or RNN."""
name = 'tensorizer'
@ -329,7 +326,7 @@ class TokenVectorEncoder(BaseThincComponent):
link_vectors_to_models(self.vocab)
class NeuralTagger(BaseThincComponent):
class Tagger(BaseThincComponent):
name = 'tagger'
def __init__(self, vocab, model=True, **cfg):
self.vocab = vocab
@ -513,7 +510,11 @@ class NeuralTagger(BaseThincComponent):
return self
class NeuralLabeller(NeuralTagger):
class MultitaskObjective(Tagger):
'''Assist training of a parser or tagger, by training a side-objective.
Experimental
'''
name = 'nn_labeller'
def __init__(self, vocab, model=True, target='dep_tag_offset', **cfg):
self.vocab = vocab
@ -532,7 +533,7 @@ class NeuralLabeller(NeuralTagger):
self.make_label = target
else:
raise ValueError(
"NeuralLabeller target should be function or one of "
"MultitaskObjective target should be function or one of "
"['dep', 'tag', 'ent', 'dep_tag_offset', 'ent_tag']")
self.cfg = dict(cfg)
self.cfg.setdefault('cnn_maxout_pieces', 2)
@ -752,45 +753,7 @@ class TextCategorizer(BaseThincComponent):
link_vectors_to_models(self.vocab)
cdef class EntityRecognizer(LinearParser):
"""Annotate named entities on Doc objects."""
TransitionSystem = BiluoPushDown
feature_templates = get_feature_templates('ner')
def add_label(self, label):
LinearParser.add_label(self, label)
if isinstance(label, basestring):
label = self.vocab.strings[label]
cdef class BeamEntityRecognizer(BeamParser):
"""Annotate named entities on Doc objects."""
TransitionSystem = BiluoPushDown
feature_templates = get_feature_templates('ner')
def add_label(self, label):
LinearParser.add_label(self, label)
if isinstance(label, basestring):
label = self.vocab.strings[label]
cdef class DependencyParser(LinearParser):
TransitionSystem = ArcEager
feature_templates = get_feature_templates('basic')
def add_label(self, label):
LinearParser.add_label(self, label)
if isinstance(label, basestring):
label = self.vocab.strings[label]
@property
def postprocesses(self):
return [nonproj.deprojectivize]
cdef class NeuralDependencyParser(NeuralParser):
cdef class DependencyParser(Parser):
name = 'parser'
TransitionSystem = ArcEager
@ -800,17 +763,17 @@ cdef class NeuralDependencyParser(NeuralParser):
def init_multitask_objectives(self, gold_tuples, pipeline, **cfg):
for target in []:
labeller = NeuralLabeller(self.vocab, target=target)
labeller = MultitaskObjective(self.vocab, target=target)
tok2vec = self.model[0]
labeller.begin_training(gold_tuples, pipeline=pipeline, tok2vec=tok2vec)
pipeline.append(labeller)
self._multitasks.append(labeller)
def __reduce__(self):
return (NeuralDependencyParser, (self.vocab, self.moves, self.model), None, None)
return (DependencyParser, (self.vocab, self.moves, self.model), None, None)
cdef class NeuralEntityRecognizer(NeuralParser):
cdef class EntityRecognizer(Parser):
name = 'ner'
TransitionSystem = BiluoPushDown
@ -818,31 +781,14 @@ cdef class NeuralEntityRecognizer(NeuralParser):
def init_multitask_objectives(self, gold_tuples, pipeline, **cfg):
for target in []:
labeller = NeuralLabeller(self.vocab, target=target)
labeller = MultitaskObjective(self.vocab, target=target)
tok2vec = self.model[0]
labeller.begin_training(gold_tuples, pipeline=pipeline, tok2vec=tok2vec)
pipeline.append(labeller)
self._multitasks.append(labeller)
def __reduce__(self):
return (NeuralEntityRecognizer, (self.vocab, self.moves, self.model), None, None)
return (EntityRecognizer, (self.vocab, self.moves, self.model), None, None)
cdef class BeamDependencyParser(BeamParser):
TransitionSystem = ArcEager
feature_templates = get_feature_templates('basic')
def add_label(self, label):
Parser.add_label(self, label)
if isinstance(label, basestring):
label = self.vocab.strings[label]
@property
def postprocesses(self):
return [nonproj.deprojectivize]
__all__ = ['Tagger', 'DependencyParser', 'EntityRecognizer', 'BeamDependencyParser',
'BeamEntityRecognizer', 'TokenVectorEnoder']
__all__ = ['Tagger', 'DependencyParser', 'EntityRecognizer', 'Tensorizer']

View File

@ -10,7 +10,8 @@ import pytest
def test_doc_add_entities_set_ents_iob(en_vocab):
text = ["This", "is", "a", "lion"]
doc = get_doc(en_vocab, text)
ner = EntityRecognizer(en_vocab, features=[(2,), (3,)])
ner = EntityRecognizer(en_vocab)
ner.begin_training([])
ner(doc)
assert len(list(doc.ents)) == 0

View File

@ -9,7 +9,7 @@ from ...attrs import NORM
from ...gold import GoldParse
from ...vocab import Vocab
from ...tokens import Doc
from ...pipeline import NeuralDependencyParser
from ...pipeline import DependencyParser
numpy.random.seed(0)
@ -21,7 +21,7 @@ def vocab():
@pytest.fixture
def parser(vocab):
parser = NeuralDependencyParser(vocab)
parser = DependencyParser(vocab)
parser.cfg['token_vector_width'] = 8
parser.cfg['hidden_width'] = 30
parser.cfg['hist_size'] = 0

View File

@ -6,7 +6,7 @@ import numpy
from ..._ml import chain, Tok2Vec, doc2feats
from ...vocab import Vocab
from ...pipeline import TokenVectorEncoder
from ...pipeline import Tensorizer
from ...syntax.arc_eager import ArcEager
from ...syntax.nn_parser import Parser
from ...tokens.doc import Doc

View File

@ -8,7 +8,7 @@ from ...attrs import NORM
from ...gold import GoldParse
from ...vocab import Vocab
from ...tokens import Doc
from ...pipeline import NeuralDependencyParser
from ...pipeline import DependencyParser
@pytest.fixture
def vocab():
@ -16,7 +16,7 @@ def vocab():
@pytest.fixture
def parser(vocab):
parser = NeuralDependencyParser(vocab)
parser = DependencyParser(vocab)
parser.cfg['token_vector_width'] = 4
parser.cfg['hidden_width'] = 32
#parser.add_label('right')

View File

@ -1,11 +1,11 @@
import pytest
from ...pipeline import NeuralDependencyParser
from ...pipeline import DependencyParser
@pytest.fixture
def parser(en_vocab):
parser = NeuralDependencyParser(en_vocab)
parser = DependencyParser(en_vocab)
parser.add_label('nsubj')
parser.model, cfg = parser.Model(parser.moves.n_moves)
parser.cfg.update(cfg)
@ -14,7 +14,7 @@ def parser(en_vocab):
@pytest.fixture
def blank_parser(en_vocab):
parser = NeuralDependencyParser(en_vocab)
parser = DependencyParser(en_vocab)
return parser

View File

@ -2,8 +2,8 @@
from __future__ import unicode_literals
from ..util import make_tempdir
from ...pipeline import NeuralDependencyParser as DependencyParser
from ...pipeline import NeuralEntityRecognizer as EntityRecognizer
from ...pipeline import DependencyParser
from ...pipeline import EntityRecognizer
import pytest

View File

@ -2,7 +2,7 @@
from __future__ import unicode_literals
from ..util import make_tempdir
from ...pipeline import NeuralTagger as Tagger
from ...pipeline import Tagger
import pytest

View File

@ -2,7 +2,7 @@
from __future__ import unicode_literals
from ..util import make_tempdir
from ...pipeline import TokenVectorEncoder as Tensorizer
from ...pipeline import Tensorizer
import pytest