mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Fix names of pipeline components
NeuralDependencyParser --> DependencyParser NeuralEntityRecognizer --> EntityRecognizer TokenVectorEncoder --> Tensorizer NeuralLabeller --> MultitaskObjective
This commit is contained in:
parent
b6b4f1aaf7
commit
b0f3ea2200
|
@ -18,8 +18,8 @@ from .tagger import Tagger
|
||||||
from .lemmatizer import Lemmatizer
|
from .lemmatizer import Lemmatizer
|
||||||
from .syntax.parser import get_templates
|
from .syntax.parser import get_templates
|
||||||
|
|
||||||
from .pipeline import NeuralDependencyParser, TokenVectorEncoder, NeuralTagger
|
from .pipeline import DependencyParser, Tensorizer, Tagger
|
||||||
from .pipeline import NeuralEntityRecognizer, SimilarityHook, TextCategorizer
|
from .pipeline import EntityRecognizer, SimilarityHook, TextCategorizer
|
||||||
|
|
||||||
from .compat import json_dumps, izip, copy_reg
|
from .compat import json_dumps, izip, copy_reg
|
||||||
from .scorer import Scorer
|
from .scorer import Scorer
|
||||||
|
@ -75,9 +75,6 @@ class BaseDefaults(object):
|
||||||
infixes = tuple(TOKENIZER_INFIXES)
|
infixes = tuple(TOKENIZER_INFIXES)
|
||||||
tag_map = dict(TAG_MAP)
|
tag_map = dict(TAG_MAP)
|
||||||
tokenizer_exceptions = {}
|
tokenizer_exceptions = {}
|
||||||
parser_features = get_templates('parser')
|
|
||||||
entity_features = get_templates('ner')
|
|
||||||
tagger_features = Tagger.feature_templates # TODO -- fix this
|
|
||||||
stop_words = set()
|
stop_words = set()
|
||||||
lemma_rules = {}
|
lemma_rules = {}
|
||||||
lemma_exc = {}
|
lemma_exc = {}
|
||||||
|
@ -102,9 +99,9 @@ class Language(object):
|
||||||
factories = {
|
factories = {
|
||||||
'tokenizer': lambda nlp: nlp.Defaults.create_tokenizer(nlp),
|
'tokenizer': lambda nlp: nlp.Defaults.create_tokenizer(nlp),
|
||||||
'tensorizer': lambda nlp, **cfg: TokenVectorEncoder(nlp.vocab, **cfg),
|
'tensorizer': lambda nlp, **cfg: TokenVectorEncoder(nlp.vocab, **cfg),
|
||||||
'tagger': lambda nlp, **cfg: NeuralTagger(nlp.vocab, **cfg),
|
'tagger': lambda nlp, **cfg: Tagger(nlp.vocab, **cfg),
|
||||||
'parser': lambda nlp, **cfg: NeuralDependencyParser(nlp.vocab, **cfg),
|
'parser': lambda nlp, **cfg: DependencyParser(nlp.vocab, **cfg),
|
||||||
'ner': lambda nlp, **cfg: NeuralEntityRecognizer(nlp.vocab, **cfg),
|
'ner': lambda nlp, **cfg: EntityRecognizer(nlp.vocab, **cfg),
|
||||||
'similarity': lambda nlp, **cfg: SimilarityHook(nlp.vocab, **cfg),
|
'similarity': lambda nlp, **cfg: SimilarityHook(nlp.vocab, **cfg),
|
||||||
'textcat': lambda nlp, **cfg: TextCategorizer(nlp.vocab, **cfg)
|
'textcat': lambda nlp, **cfg: TextCategorizer(nlp.vocab, **cfg)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,21 +0,0 @@
|
||||||
from .syntax.parser cimport Parser
|
|
||||||
#from .syntax.beam_parser cimport BeamParser
|
|
||||||
from .syntax.ner cimport BiluoPushDown
|
|
||||||
from .syntax.arc_eager cimport ArcEager
|
|
||||||
from .tagger cimport Tagger
|
|
||||||
|
|
||||||
|
|
||||||
cdef class EntityRecognizer(Parser):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
cdef class DependencyParser(Parser):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
#cdef class BeamEntityRecognizer(BeamParser):
|
|
||||||
# pass
|
|
||||||
#
|
|
||||||
#
|
|
||||||
#cdef class BeamDependencyParser(BeamParser):
|
|
||||||
# pass
|
|
|
@ -26,11 +26,8 @@ from thinc.neural.util import to_categorical
|
||||||
from thinc.neural._classes.difference import Siamese, CauchySimilarity
|
from thinc.neural._classes.difference import Siamese, CauchySimilarity
|
||||||
|
|
||||||
from .tokens.doc cimport Doc
|
from .tokens.doc cimport Doc
|
||||||
from .syntax.parser cimport Parser as LinearParser
|
from .syntax.nn_parser cimport Parser
|
||||||
from .syntax.nn_parser cimport Parser as NeuralParser
|
|
||||||
from .syntax import nonproj
|
from .syntax import nonproj
|
||||||
from .syntax.parser import get_templates as get_feature_templates
|
|
||||||
from .syntax.beam_parser cimport BeamParser
|
|
||||||
from .syntax.ner cimport BiluoPushDown
|
from .syntax.ner cimport BiluoPushDown
|
||||||
from .syntax.arc_eager cimport ArcEager
|
from .syntax.arc_eager cimport ArcEager
|
||||||
from .tagger import Tagger
|
from .tagger import Tagger
|
||||||
|
@ -217,7 +214,7 @@ def _load_cfg(path):
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
class TokenVectorEncoder(BaseThincComponent):
|
class Tensorizer(BaseThincComponent):
|
||||||
"""Assign position-sensitive vectors to tokens, using a CNN or RNN."""
|
"""Assign position-sensitive vectors to tokens, using a CNN or RNN."""
|
||||||
name = 'tensorizer'
|
name = 'tensorizer'
|
||||||
|
|
||||||
|
@ -329,7 +326,7 @@ class TokenVectorEncoder(BaseThincComponent):
|
||||||
link_vectors_to_models(self.vocab)
|
link_vectors_to_models(self.vocab)
|
||||||
|
|
||||||
|
|
||||||
class NeuralTagger(BaseThincComponent):
|
class Tagger(BaseThincComponent):
|
||||||
name = 'tagger'
|
name = 'tagger'
|
||||||
def __init__(self, vocab, model=True, **cfg):
|
def __init__(self, vocab, model=True, **cfg):
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
|
@ -513,7 +510,11 @@ class NeuralTagger(BaseThincComponent):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
class NeuralLabeller(NeuralTagger):
|
class MultitaskObjective(Tagger):
|
||||||
|
'''Assist training of a parser or tagger, by training a side-objective.
|
||||||
|
|
||||||
|
Experimental
|
||||||
|
'''
|
||||||
name = 'nn_labeller'
|
name = 'nn_labeller'
|
||||||
def __init__(self, vocab, model=True, target='dep_tag_offset', **cfg):
|
def __init__(self, vocab, model=True, target='dep_tag_offset', **cfg):
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
|
@ -532,7 +533,7 @@ class NeuralLabeller(NeuralTagger):
|
||||||
self.make_label = target
|
self.make_label = target
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"NeuralLabeller target should be function or one of "
|
"MultitaskObjective target should be function or one of "
|
||||||
"['dep', 'tag', 'ent', 'dep_tag_offset', 'ent_tag']")
|
"['dep', 'tag', 'ent', 'dep_tag_offset', 'ent_tag']")
|
||||||
self.cfg = dict(cfg)
|
self.cfg = dict(cfg)
|
||||||
self.cfg.setdefault('cnn_maxout_pieces', 2)
|
self.cfg.setdefault('cnn_maxout_pieces', 2)
|
||||||
|
@ -752,45 +753,7 @@ class TextCategorizer(BaseThincComponent):
|
||||||
link_vectors_to_models(self.vocab)
|
link_vectors_to_models(self.vocab)
|
||||||
|
|
||||||
|
|
||||||
cdef class EntityRecognizer(LinearParser):
|
cdef class DependencyParser(Parser):
|
||||||
"""Annotate named entities on Doc objects."""
|
|
||||||
TransitionSystem = BiluoPushDown
|
|
||||||
|
|
||||||
feature_templates = get_feature_templates('ner')
|
|
||||||
|
|
||||||
def add_label(self, label):
|
|
||||||
LinearParser.add_label(self, label)
|
|
||||||
if isinstance(label, basestring):
|
|
||||||
label = self.vocab.strings[label]
|
|
||||||
|
|
||||||
|
|
||||||
cdef class BeamEntityRecognizer(BeamParser):
|
|
||||||
"""Annotate named entities on Doc objects."""
|
|
||||||
TransitionSystem = BiluoPushDown
|
|
||||||
|
|
||||||
feature_templates = get_feature_templates('ner')
|
|
||||||
|
|
||||||
def add_label(self, label):
|
|
||||||
LinearParser.add_label(self, label)
|
|
||||||
if isinstance(label, basestring):
|
|
||||||
label = self.vocab.strings[label]
|
|
||||||
|
|
||||||
|
|
||||||
cdef class DependencyParser(LinearParser):
|
|
||||||
TransitionSystem = ArcEager
|
|
||||||
feature_templates = get_feature_templates('basic')
|
|
||||||
|
|
||||||
def add_label(self, label):
|
|
||||||
LinearParser.add_label(self, label)
|
|
||||||
if isinstance(label, basestring):
|
|
||||||
label = self.vocab.strings[label]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def postprocesses(self):
|
|
||||||
return [nonproj.deprojectivize]
|
|
||||||
|
|
||||||
|
|
||||||
cdef class NeuralDependencyParser(NeuralParser):
|
|
||||||
name = 'parser'
|
name = 'parser'
|
||||||
TransitionSystem = ArcEager
|
TransitionSystem = ArcEager
|
||||||
|
|
||||||
|
@ -800,17 +763,17 @@ cdef class NeuralDependencyParser(NeuralParser):
|
||||||
|
|
||||||
def init_multitask_objectives(self, gold_tuples, pipeline, **cfg):
|
def init_multitask_objectives(self, gold_tuples, pipeline, **cfg):
|
||||||
for target in []:
|
for target in []:
|
||||||
labeller = NeuralLabeller(self.vocab, target=target)
|
labeller = MultitaskObjective(self.vocab, target=target)
|
||||||
tok2vec = self.model[0]
|
tok2vec = self.model[0]
|
||||||
labeller.begin_training(gold_tuples, pipeline=pipeline, tok2vec=tok2vec)
|
labeller.begin_training(gold_tuples, pipeline=pipeline, tok2vec=tok2vec)
|
||||||
pipeline.append(labeller)
|
pipeline.append(labeller)
|
||||||
self._multitasks.append(labeller)
|
self._multitasks.append(labeller)
|
||||||
|
|
||||||
def __reduce__(self):
|
def __reduce__(self):
|
||||||
return (NeuralDependencyParser, (self.vocab, self.moves, self.model), None, None)
|
return (DependencyParser, (self.vocab, self.moves, self.model), None, None)
|
||||||
|
|
||||||
|
|
||||||
cdef class NeuralEntityRecognizer(NeuralParser):
|
cdef class EntityRecognizer(Parser):
|
||||||
name = 'ner'
|
name = 'ner'
|
||||||
TransitionSystem = BiluoPushDown
|
TransitionSystem = BiluoPushDown
|
||||||
|
|
||||||
|
@ -818,31 +781,14 @@ cdef class NeuralEntityRecognizer(NeuralParser):
|
||||||
|
|
||||||
def init_multitask_objectives(self, gold_tuples, pipeline, **cfg):
|
def init_multitask_objectives(self, gold_tuples, pipeline, **cfg):
|
||||||
for target in []:
|
for target in []:
|
||||||
labeller = NeuralLabeller(self.vocab, target=target)
|
labeller = MultitaskObjective(self.vocab, target=target)
|
||||||
tok2vec = self.model[0]
|
tok2vec = self.model[0]
|
||||||
labeller.begin_training(gold_tuples, pipeline=pipeline, tok2vec=tok2vec)
|
labeller.begin_training(gold_tuples, pipeline=pipeline, tok2vec=tok2vec)
|
||||||
pipeline.append(labeller)
|
pipeline.append(labeller)
|
||||||
self._multitasks.append(labeller)
|
self._multitasks.append(labeller)
|
||||||
|
|
||||||
def __reduce__(self):
|
def __reduce__(self):
|
||||||
return (NeuralEntityRecognizer, (self.vocab, self.moves, self.model), None, None)
|
return (EntityRecognizer, (self.vocab, self.moves, self.model), None, None)
|
||||||
|
|
||||||
|
|
||||||
cdef class BeamDependencyParser(BeamParser):
|
__all__ = ['Tagger', 'DependencyParser', 'EntityRecognizer', 'Tensorizer']
|
||||||
TransitionSystem = ArcEager
|
|
||||||
|
|
||||||
feature_templates = get_feature_templates('basic')
|
|
||||||
|
|
||||||
def add_label(self, label):
|
|
||||||
Parser.add_label(self, label)
|
|
||||||
if isinstance(label, basestring):
|
|
||||||
label = self.vocab.strings[label]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def postprocesses(self):
|
|
||||||
return [nonproj.deprojectivize]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['Tagger', 'DependencyParser', 'EntityRecognizer', 'BeamDependencyParser',
|
|
||||||
'BeamEntityRecognizer', 'TokenVectorEnoder']
|
|
||||||
|
|
|
@ -10,7 +10,8 @@ import pytest
|
||||||
def test_doc_add_entities_set_ents_iob(en_vocab):
|
def test_doc_add_entities_set_ents_iob(en_vocab):
|
||||||
text = ["This", "is", "a", "lion"]
|
text = ["This", "is", "a", "lion"]
|
||||||
doc = get_doc(en_vocab, text)
|
doc = get_doc(en_vocab, text)
|
||||||
ner = EntityRecognizer(en_vocab, features=[(2,), (3,)])
|
ner = EntityRecognizer(en_vocab)
|
||||||
|
ner.begin_training([])
|
||||||
ner(doc)
|
ner(doc)
|
||||||
|
|
||||||
assert len(list(doc.ents)) == 0
|
assert len(list(doc.ents)) == 0
|
||||||
|
|
|
@ -9,7 +9,7 @@ from ...attrs import NORM
|
||||||
from ...gold import GoldParse
|
from ...gold import GoldParse
|
||||||
from ...vocab import Vocab
|
from ...vocab import Vocab
|
||||||
from ...tokens import Doc
|
from ...tokens import Doc
|
||||||
from ...pipeline import NeuralDependencyParser
|
from ...pipeline import DependencyParser
|
||||||
|
|
||||||
numpy.random.seed(0)
|
numpy.random.seed(0)
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ def vocab():
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def parser(vocab):
|
def parser(vocab):
|
||||||
parser = NeuralDependencyParser(vocab)
|
parser = DependencyParser(vocab)
|
||||||
parser.cfg['token_vector_width'] = 8
|
parser.cfg['token_vector_width'] = 8
|
||||||
parser.cfg['hidden_width'] = 30
|
parser.cfg['hidden_width'] = 30
|
||||||
parser.cfg['hist_size'] = 0
|
parser.cfg['hist_size'] = 0
|
||||||
|
|
|
@ -6,7 +6,7 @@ import numpy
|
||||||
|
|
||||||
from ..._ml import chain, Tok2Vec, doc2feats
|
from ..._ml import chain, Tok2Vec, doc2feats
|
||||||
from ...vocab import Vocab
|
from ...vocab import Vocab
|
||||||
from ...pipeline import TokenVectorEncoder
|
from ...pipeline import Tensorizer
|
||||||
from ...syntax.arc_eager import ArcEager
|
from ...syntax.arc_eager import ArcEager
|
||||||
from ...syntax.nn_parser import Parser
|
from ...syntax.nn_parser import Parser
|
||||||
from ...tokens.doc import Doc
|
from ...tokens.doc import Doc
|
||||||
|
|
|
@ -8,7 +8,7 @@ from ...attrs import NORM
|
||||||
from ...gold import GoldParse
|
from ...gold import GoldParse
|
||||||
from ...vocab import Vocab
|
from ...vocab import Vocab
|
||||||
from ...tokens import Doc
|
from ...tokens import Doc
|
||||||
from ...pipeline import NeuralDependencyParser
|
from ...pipeline import DependencyParser
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def vocab():
|
def vocab():
|
||||||
|
@ -16,7 +16,7 @@ def vocab():
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def parser(vocab):
|
def parser(vocab):
|
||||||
parser = NeuralDependencyParser(vocab)
|
parser = DependencyParser(vocab)
|
||||||
parser.cfg['token_vector_width'] = 4
|
parser.cfg['token_vector_width'] = 4
|
||||||
parser.cfg['hidden_width'] = 32
|
parser.cfg['hidden_width'] = 32
|
||||||
#parser.add_label('right')
|
#parser.add_label('right')
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from ...pipeline import NeuralDependencyParser
|
from ...pipeline import DependencyParser
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def parser(en_vocab):
|
def parser(en_vocab):
|
||||||
parser = NeuralDependencyParser(en_vocab)
|
parser = DependencyParser(en_vocab)
|
||||||
parser.add_label('nsubj')
|
parser.add_label('nsubj')
|
||||||
parser.model, cfg = parser.Model(parser.moves.n_moves)
|
parser.model, cfg = parser.Model(parser.moves.n_moves)
|
||||||
parser.cfg.update(cfg)
|
parser.cfg.update(cfg)
|
||||||
|
@ -14,7 +14,7 @@ def parser(en_vocab):
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def blank_parser(en_vocab):
|
def blank_parser(en_vocab):
|
||||||
parser = NeuralDependencyParser(en_vocab)
|
parser = DependencyParser(en_vocab)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,8 +2,8 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ..util import make_tempdir
|
from ..util import make_tempdir
|
||||||
from ...pipeline import NeuralDependencyParser as DependencyParser
|
from ...pipeline import DependencyParser
|
||||||
from ...pipeline import NeuralEntityRecognizer as EntityRecognizer
|
from ...pipeline import EntityRecognizer
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ..util import make_tempdir
|
from ..util import make_tempdir
|
||||||
from ...pipeline import NeuralTagger as Tagger
|
from ...pipeline import Tagger
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ..util import make_tempdir
|
from ..util import make_tempdir
|
||||||
from ...pipeline import TokenVectorEncoder as Tensorizer
|
from ...pipeline import Tensorizer
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user