mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 01:34:30 +03:00
Fix names of pipeline components
NeuralDependencyParser --> DependencyParser NeuralEntityRecognizer --> EntityRecognizer TokenVectorEncoder --> Tensorizer NeuralLabeller --> MultitaskObjective
This commit is contained in:
parent
b6b4f1aaf7
commit
b0f3ea2200
|
@ -18,8 +18,8 @@ from .tagger import Tagger
|
|||
from .lemmatizer import Lemmatizer
|
||||
from .syntax.parser import get_templates
|
||||
|
||||
from .pipeline import NeuralDependencyParser, TokenVectorEncoder, NeuralTagger
|
||||
from .pipeline import NeuralEntityRecognizer, SimilarityHook, TextCategorizer
|
||||
from .pipeline import DependencyParser, Tensorizer, Tagger
|
||||
from .pipeline import EntityRecognizer, SimilarityHook, TextCategorizer
|
||||
|
||||
from .compat import json_dumps, izip, copy_reg
|
||||
from .scorer import Scorer
|
||||
|
@ -75,9 +75,6 @@ class BaseDefaults(object):
|
|||
infixes = tuple(TOKENIZER_INFIXES)
|
||||
tag_map = dict(TAG_MAP)
|
||||
tokenizer_exceptions = {}
|
||||
parser_features = get_templates('parser')
|
||||
entity_features = get_templates('ner')
|
||||
tagger_features = Tagger.feature_templates # TODO -- fix this
|
||||
stop_words = set()
|
||||
lemma_rules = {}
|
||||
lemma_exc = {}
|
||||
|
@ -102,9 +99,9 @@ class Language(object):
|
|||
factories = {
|
||||
'tokenizer': lambda nlp: nlp.Defaults.create_tokenizer(nlp),
|
||||
'tensorizer': lambda nlp, **cfg: TokenVectorEncoder(nlp.vocab, **cfg),
|
||||
'tagger': lambda nlp, **cfg: NeuralTagger(nlp.vocab, **cfg),
|
||||
'parser': lambda nlp, **cfg: NeuralDependencyParser(nlp.vocab, **cfg),
|
||||
'ner': lambda nlp, **cfg: NeuralEntityRecognizer(nlp.vocab, **cfg),
|
||||
'tagger': lambda nlp, **cfg: Tagger(nlp.vocab, **cfg),
|
||||
'parser': lambda nlp, **cfg: DependencyParser(nlp.vocab, **cfg),
|
||||
'ner': lambda nlp, **cfg: EntityRecognizer(nlp.vocab, **cfg),
|
||||
'similarity': lambda nlp, **cfg: SimilarityHook(nlp.vocab, **cfg),
|
||||
'textcat': lambda nlp, **cfg: TextCategorizer(nlp.vocab, **cfg)
|
||||
}
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
from .syntax.parser cimport Parser
|
||||
#from .syntax.beam_parser cimport BeamParser
|
||||
from .syntax.ner cimport BiluoPushDown
|
||||
from .syntax.arc_eager cimport ArcEager
|
||||
from .tagger cimport Tagger
|
||||
|
||||
|
||||
cdef class EntityRecognizer(Parser):
|
||||
pass
|
||||
|
||||
|
||||
cdef class DependencyParser(Parser):
|
||||
pass
|
||||
|
||||
|
||||
#cdef class BeamEntityRecognizer(BeamParser):
|
||||
# pass
|
||||
#
|
||||
#
|
||||
#cdef class BeamDependencyParser(BeamParser):
|
||||
# pass
|
|
@ -26,11 +26,8 @@ from thinc.neural.util import to_categorical
|
|||
from thinc.neural._classes.difference import Siamese, CauchySimilarity
|
||||
|
||||
from .tokens.doc cimport Doc
|
||||
from .syntax.parser cimport Parser as LinearParser
|
||||
from .syntax.nn_parser cimport Parser as NeuralParser
|
||||
from .syntax.nn_parser cimport Parser
|
||||
from .syntax import nonproj
|
||||
from .syntax.parser import get_templates as get_feature_templates
|
||||
from .syntax.beam_parser cimport BeamParser
|
||||
from .syntax.ner cimport BiluoPushDown
|
||||
from .syntax.arc_eager cimport ArcEager
|
||||
from .tagger import Tagger
|
||||
|
@ -217,7 +214,7 @@ def _load_cfg(path):
|
|||
return {}
|
||||
|
||||
|
||||
class TokenVectorEncoder(BaseThincComponent):
|
||||
class Tensorizer(BaseThincComponent):
|
||||
"""Assign position-sensitive vectors to tokens, using a CNN or RNN."""
|
||||
name = 'tensorizer'
|
||||
|
||||
|
@ -329,7 +326,7 @@ class TokenVectorEncoder(BaseThincComponent):
|
|||
link_vectors_to_models(self.vocab)
|
||||
|
||||
|
||||
class NeuralTagger(BaseThincComponent):
|
||||
class Tagger(BaseThincComponent):
|
||||
name = 'tagger'
|
||||
def __init__(self, vocab, model=True, **cfg):
|
||||
self.vocab = vocab
|
||||
|
@ -513,7 +510,11 @@ class NeuralTagger(BaseThincComponent):
|
|||
return self
|
||||
|
||||
|
||||
class NeuralLabeller(NeuralTagger):
|
||||
class MultitaskObjective(Tagger):
|
||||
'''Assist training of a parser or tagger, by training a side-objective.
|
||||
|
||||
Experimental
|
||||
'''
|
||||
name = 'nn_labeller'
|
||||
def __init__(self, vocab, model=True, target='dep_tag_offset', **cfg):
|
||||
self.vocab = vocab
|
||||
|
@ -532,7 +533,7 @@ class NeuralLabeller(NeuralTagger):
|
|||
self.make_label = target
|
||||
else:
|
||||
raise ValueError(
|
||||
"NeuralLabeller target should be function or one of "
|
||||
"MultitaskObjective target should be function or one of "
|
||||
"['dep', 'tag', 'ent', 'dep_tag_offset', 'ent_tag']")
|
||||
self.cfg = dict(cfg)
|
||||
self.cfg.setdefault('cnn_maxout_pieces', 2)
|
||||
|
@ -752,45 +753,7 @@ class TextCategorizer(BaseThincComponent):
|
|||
link_vectors_to_models(self.vocab)
|
||||
|
||||
|
||||
cdef class EntityRecognizer(LinearParser):
|
||||
"""Annotate named entities on Doc objects."""
|
||||
TransitionSystem = BiluoPushDown
|
||||
|
||||
feature_templates = get_feature_templates('ner')
|
||||
|
||||
def add_label(self, label):
|
||||
LinearParser.add_label(self, label)
|
||||
if isinstance(label, basestring):
|
||||
label = self.vocab.strings[label]
|
||||
|
||||
|
||||
cdef class BeamEntityRecognizer(BeamParser):
|
||||
"""Annotate named entities on Doc objects."""
|
||||
TransitionSystem = BiluoPushDown
|
||||
|
||||
feature_templates = get_feature_templates('ner')
|
||||
|
||||
def add_label(self, label):
|
||||
LinearParser.add_label(self, label)
|
||||
if isinstance(label, basestring):
|
||||
label = self.vocab.strings[label]
|
||||
|
||||
|
||||
cdef class DependencyParser(LinearParser):
|
||||
TransitionSystem = ArcEager
|
||||
feature_templates = get_feature_templates('basic')
|
||||
|
||||
def add_label(self, label):
|
||||
LinearParser.add_label(self, label)
|
||||
if isinstance(label, basestring):
|
||||
label = self.vocab.strings[label]
|
||||
|
||||
@property
|
||||
def postprocesses(self):
|
||||
return [nonproj.deprojectivize]
|
||||
|
||||
|
||||
cdef class NeuralDependencyParser(NeuralParser):
|
||||
cdef class DependencyParser(Parser):
|
||||
name = 'parser'
|
||||
TransitionSystem = ArcEager
|
||||
|
||||
|
@ -800,17 +763,17 @@ cdef class NeuralDependencyParser(NeuralParser):
|
|||
|
||||
def init_multitask_objectives(self, gold_tuples, pipeline, **cfg):
|
||||
for target in []:
|
||||
labeller = NeuralLabeller(self.vocab, target=target)
|
||||
labeller = MultitaskObjective(self.vocab, target=target)
|
||||
tok2vec = self.model[0]
|
||||
labeller.begin_training(gold_tuples, pipeline=pipeline, tok2vec=tok2vec)
|
||||
pipeline.append(labeller)
|
||||
self._multitasks.append(labeller)
|
||||
|
||||
def __reduce__(self):
|
||||
return (NeuralDependencyParser, (self.vocab, self.moves, self.model), None, None)
|
||||
return (DependencyParser, (self.vocab, self.moves, self.model), None, None)
|
||||
|
||||
|
||||
cdef class NeuralEntityRecognizer(NeuralParser):
|
||||
cdef class EntityRecognizer(Parser):
|
||||
name = 'ner'
|
||||
TransitionSystem = BiluoPushDown
|
||||
|
||||
|
@ -818,31 +781,14 @@ cdef class NeuralEntityRecognizer(NeuralParser):
|
|||
|
||||
def init_multitask_objectives(self, gold_tuples, pipeline, **cfg):
|
||||
for target in []:
|
||||
labeller = NeuralLabeller(self.vocab, target=target)
|
||||
labeller = MultitaskObjective(self.vocab, target=target)
|
||||
tok2vec = self.model[0]
|
||||
labeller.begin_training(gold_tuples, pipeline=pipeline, tok2vec=tok2vec)
|
||||
pipeline.append(labeller)
|
||||
self._multitasks.append(labeller)
|
||||
|
||||
def __reduce__(self):
|
||||
return (NeuralEntityRecognizer, (self.vocab, self.moves, self.model), None, None)
|
||||
return (EntityRecognizer, (self.vocab, self.moves, self.model), None, None)
|
||||
|
||||
|
||||
cdef class BeamDependencyParser(BeamParser):
|
||||
TransitionSystem = ArcEager
|
||||
|
||||
feature_templates = get_feature_templates('basic')
|
||||
|
||||
def add_label(self, label):
|
||||
Parser.add_label(self, label)
|
||||
if isinstance(label, basestring):
|
||||
label = self.vocab.strings[label]
|
||||
|
||||
@property
|
||||
def postprocesses(self):
|
||||
return [nonproj.deprojectivize]
|
||||
|
||||
|
||||
|
||||
__all__ = ['Tagger', 'DependencyParser', 'EntityRecognizer', 'BeamDependencyParser',
|
||||
'BeamEntityRecognizer', 'TokenVectorEnoder']
|
||||
__all__ = ['Tagger', 'DependencyParser', 'EntityRecognizer', 'Tensorizer']
|
||||
|
|
|
@ -10,7 +10,8 @@ import pytest
|
|||
def test_doc_add_entities_set_ents_iob(en_vocab):
|
||||
text = ["This", "is", "a", "lion"]
|
||||
doc = get_doc(en_vocab, text)
|
||||
ner = EntityRecognizer(en_vocab, features=[(2,), (3,)])
|
||||
ner = EntityRecognizer(en_vocab)
|
||||
ner.begin_training([])
|
||||
ner(doc)
|
||||
|
||||
assert len(list(doc.ents)) == 0
|
||||
|
|
|
@ -9,7 +9,7 @@ from ...attrs import NORM
|
|||
from ...gold import GoldParse
|
||||
from ...vocab import Vocab
|
||||
from ...tokens import Doc
|
||||
from ...pipeline import NeuralDependencyParser
|
||||
from ...pipeline import DependencyParser
|
||||
|
||||
numpy.random.seed(0)
|
||||
|
||||
|
@ -21,7 +21,7 @@ def vocab():
|
|||
|
||||
@pytest.fixture
|
||||
def parser(vocab):
|
||||
parser = NeuralDependencyParser(vocab)
|
||||
parser = DependencyParser(vocab)
|
||||
parser.cfg['token_vector_width'] = 8
|
||||
parser.cfg['hidden_width'] = 30
|
||||
parser.cfg['hist_size'] = 0
|
||||
|
|
|
@ -6,7 +6,7 @@ import numpy
|
|||
|
||||
from ..._ml import chain, Tok2Vec, doc2feats
|
||||
from ...vocab import Vocab
|
||||
from ...pipeline import TokenVectorEncoder
|
||||
from ...pipeline import Tensorizer
|
||||
from ...syntax.arc_eager import ArcEager
|
||||
from ...syntax.nn_parser import Parser
|
||||
from ...tokens.doc import Doc
|
||||
|
|
|
@ -8,7 +8,7 @@ from ...attrs import NORM
|
|||
from ...gold import GoldParse
|
||||
from ...vocab import Vocab
|
||||
from ...tokens import Doc
|
||||
from ...pipeline import NeuralDependencyParser
|
||||
from ...pipeline import DependencyParser
|
||||
|
||||
@pytest.fixture
|
||||
def vocab():
|
||||
|
@ -16,7 +16,7 @@ def vocab():
|
|||
|
||||
@pytest.fixture
|
||||
def parser(vocab):
|
||||
parser = NeuralDependencyParser(vocab)
|
||||
parser = DependencyParser(vocab)
|
||||
parser.cfg['token_vector_width'] = 4
|
||||
parser.cfg['hidden_width'] = 32
|
||||
#parser.add_label('right')
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
import pytest
|
||||
|
||||
from ...pipeline import NeuralDependencyParser
|
||||
from ...pipeline import DependencyParser
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def parser(en_vocab):
|
||||
parser = NeuralDependencyParser(en_vocab)
|
||||
parser = DependencyParser(en_vocab)
|
||||
parser.add_label('nsubj')
|
||||
parser.model, cfg = parser.Model(parser.moves.n_moves)
|
||||
parser.cfg.update(cfg)
|
||||
|
@ -14,7 +14,7 @@ def parser(en_vocab):
|
|||
|
||||
@pytest.fixture
|
||||
def blank_parser(en_vocab):
|
||||
parser = NeuralDependencyParser(en_vocab)
|
||||
parser = DependencyParser(en_vocab)
|
||||
return parser
|
||||
|
||||
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from ..util import make_tempdir
|
||||
from ...pipeline import NeuralDependencyParser as DependencyParser
|
||||
from ...pipeline import NeuralEntityRecognizer as EntityRecognizer
|
||||
from ...pipeline import DependencyParser
|
||||
from ...pipeline import EntityRecognizer
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from ..util import make_tempdir
|
||||
from ...pipeline import NeuralTagger as Tagger
|
||||
from ...pipeline import Tagger
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from ..util import make_tempdir
|
||||
from ...pipeline import TokenVectorEncoder as Tensorizer
|
||||
from ...pipeline import Tensorizer
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user