mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Fix names of pipeline components
NeuralDependencyParser --> DependencyParser NeuralEntityRecognizer --> EntityRecognizer TokenVectorEncoder --> Tensorizer NeuralLabeller --> MultitaskObjective
This commit is contained in:
		
							parent
							
								
									b6b4f1aaf7
								
							
						
					
					
						commit
						b0f3ea2200
					
				| 
						 | 
				
			
			@ -18,8 +18,8 @@ from .tagger import Tagger
 | 
			
		|||
from .lemmatizer import Lemmatizer
 | 
			
		||||
from .syntax.parser import get_templates
 | 
			
		||||
 | 
			
		||||
from .pipeline import NeuralDependencyParser, TokenVectorEncoder, NeuralTagger
 | 
			
		||||
from .pipeline import NeuralEntityRecognizer, SimilarityHook, TextCategorizer
 | 
			
		||||
from .pipeline import DependencyParser, Tensorizer, Tagger
 | 
			
		||||
from .pipeline import EntityRecognizer, SimilarityHook, TextCategorizer
 | 
			
		||||
 | 
			
		||||
from .compat import json_dumps, izip, copy_reg
 | 
			
		||||
from .scorer import Scorer
 | 
			
		||||
| 
						 | 
				
			
			@ -75,9 +75,6 @@ class BaseDefaults(object):
 | 
			
		|||
    infixes = tuple(TOKENIZER_INFIXES)
 | 
			
		||||
    tag_map = dict(TAG_MAP)
 | 
			
		||||
    tokenizer_exceptions = {}
 | 
			
		||||
    parser_features = get_templates('parser')
 | 
			
		||||
    entity_features = get_templates('ner')
 | 
			
		||||
    tagger_features = Tagger.feature_templates # TODO -- fix this
 | 
			
		||||
    stop_words = set()
 | 
			
		||||
    lemma_rules = {}
 | 
			
		||||
    lemma_exc = {}
 | 
			
		||||
| 
						 | 
				
			
			@ -102,9 +99,9 @@ class Language(object):
 | 
			
		|||
    factories = {
 | 
			
		||||
        'tokenizer': lambda nlp: nlp.Defaults.create_tokenizer(nlp),
 | 
			
		||||
        'tensorizer': lambda nlp, **cfg: TokenVectorEncoder(nlp.vocab, **cfg),
 | 
			
		||||
        'tagger': lambda nlp, **cfg: NeuralTagger(nlp.vocab, **cfg),
 | 
			
		||||
        'parser': lambda nlp, **cfg: NeuralDependencyParser(nlp.vocab, **cfg),
 | 
			
		||||
        'ner': lambda nlp, **cfg: NeuralEntityRecognizer(nlp.vocab, **cfg),
 | 
			
		||||
        'tagger': lambda nlp, **cfg: Tagger(nlp.vocab, **cfg),
 | 
			
		||||
        'parser': lambda nlp, **cfg: DependencyParser(nlp.vocab, **cfg),
 | 
			
		||||
        'ner': lambda nlp, **cfg: EntityRecognizer(nlp.vocab, **cfg),
 | 
			
		||||
        'similarity': lambda nlp, **cfg: SimilarityHook(nlp.vocab, **cfg),
 | 
			
		||||
        'textcat': lambda nlp, **cfg: TextCategorizer(nlp.vocab, **cfg)
 | 
			
		||||
    }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,21 +0,0 @@
 | 
			
		|||
from .syntax.parser cimport Parser
 | 
			
		||||
#from .syntax.beam_parser cimport BeamParser
 | 
			
		||||
from .syntax.ner cimport BiluoPushDown
 | 
			
		||||
from .syntax.arc_eager cimport ArcEager
 | 
			
		||||
from .tagger cimport Tagger
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cdef class EntityRecognizer(Parser):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cdef class DependencyParser(Parser):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#cdef class BeamEntityRecognizer(BeamParser):
 | 
			
		||||
#    pass
 | 
			
		||||
#
 | 
			
		||||
#
 | 
			
		||||
#cdef class BeamDependencyParser(BeamParser):
 | 
			
		||||
#    pass
 | 
			
		||||
| 
						 | 
				
			
			@ -26,11 +26,8 @@ from thinc.neural.util import to_categorical
 | 
			
		|||
from thinc.neural._classes.difference import Siamese, CauchySimilarity
 | 
			
		||||
 | 
			
		||||
from .tokens.doc cimport Doc
 | 
			
		||||
from .syntax.parser cimport Parser as LinearParser
 | 
			
		||||
from .syntax.nn_parser cimport Parser as NeuralParser
 | 
			
		||||
from .syntax.nn_parser cimport Parser
 | 
			
		||||
from .syntax import nonproj
 | 
			
		||||
from .syntax.parser import get_templates as get_feature_templates
 | 
			
		||||
from .syntax.beam_parser cimport BeamParser
 | 
			
		||||
from .syntax.ner cimport BiluoPushDown
 | 
			
		||||
from .syntax.arc_eager cimport ArcEager
 | 
			
		||||
from .tagger import Tagger
 | 
			
		||||
| 
						 | 
				
			
			@ -217,7 +214,7 @@ def _load_cfg(path):
 | 
			
		|||
        return {}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TokenVectorEncoder(BaseThincComponent):
 | 
			
		||||
class Tensorizer(BaseThincComponent):
 | 
			
		||||
    """Assign position-sensitive vectors to tokens, using a CNN or RNN."""
 | 
			
		||||
    name = 'tensorizer'
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -329,7 +326,7 @@ class TokenVectorEncoder(BaseThincComponent):
 | 
			
		|||
        link_vectors_to_models(self.vocab)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NeuralTagger(BaseThincComponent):
 | 
			
		||||
class Tagger(BaseThincComponent):
 | 
			
		||||
    name = 'tagger'
 | 
			
		||||
    def __init__(self, vocab, model=True, **cfg):
 | 
			
		||||
        self.vocab = vocab
 | 
			
		||||
| 
						 | 
				
			
			@ -513,7 +510,11 @@ class NeuralTagger(BaseThincComponent):
 | 
			
		|||
        return self
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NeuralLabeller(NeuralTagger):
 | 
			
		||||
class MultitaskObjective(Tagger):
 | 
			
		||||
    '''Assist training of a parser or tagger, by training a side-objective.
 | 
			
		||||
 | 
			
		||||
    Experimental
 | 
			
		||||
    '''
 | 
			
		||||
    name = 'nn_labeller'
 | 
			
		||||
    def __init__(self, vocab, model=True, target='dep_tag_offset', **cfg):
 | 
			
		||||
        self.vocab = vocab
 | 
			
		||||
| 
						 | 
				
			
			@ -532,7 +533,7 @@ class NeuralLabeller(NeuralTagger):
 | 
			
		|||
            self.make_label = target
 | 
			
		||||
        else:
 | 
			
		||||
            raise ValueError(
 | 
			
		||||
                "NeuralLabeller target should be function or one of "
 | 
			
		||||
                "MultitaskObjective target should be function or one of "
 | 
			
		||||
                "['dep', 'tag', 'ent', 'dep_tag_offset', 'ent_tag']")
 | 
			
		||||
        self.cfg = dict(cfg)
 | 
			
		||||
        self.cfg.setdefault('cnn_maxout_pieces', 2)
 | 
			
		||||
| 
						 | 
				
			
			@ -752,45 +753,7 @@ class TextCategorizer(BaseThincComponent):
 | 
			
		|||
            link_vectors_to_models(self.vocab)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cdef class EntityRecognizer(LinearParser):
 | 
			
		||||
    """Annotate named entities on Doc objects."""
 | 
			
		||||
    TransitionSystem = BiluoPushDown
 | 
			
		||||
 | 
			
		||||
    feature_templates = get_feature_templates('ner')
 | 
			
		||||
 | 
			
		||||
    def add_label(self, label):
 | 
			
		||||
        LinearParser.add_label(self, label)
 | 
			
		||||
        if isinstance(label, basestring):
 | 
			
		||||
            label = self.vocab.strings[label]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cdef class BeamEntityRecognizer(BeamParser):
 | 
			
		||||
    """Annotate named entities on Doc objects."""
 | 
			
		||||
    TransitionSystem = BiluoPushDown
 | 
			
		||||
 | 
			
		||||
    feature_templates = get_feature_templates('ner')
 | 
			
		||||
 | 
			
		||||
    def add_label(self, label):
 | 
			
		||||
        LinearParser.add_label(self, label)
 | 
			
		||||
        if isinstance(label, basestring):
 | 
			
		||||
            label = self.vocab.strings[label]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cdef class DependencyParser(LinearParser):
 | 
			
		||||
    TransitionSystem = ArcEager
 | 
			
		||||
    feature_templates = get_feature_templates('basic')
 | 
			
		||||
 | 
			
		||||
    def add_label(self, label):
 | 
			
		||||
        LinearParser.add_label(self, label)
 | 
			
		||||
        if isinstance(label, basestring):
 | 
			
		||||
            label = self.vocab.strings[label]
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def postprocesses(self):
 | 
			
		||||
        return [nonproj.deprojectivize]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cdef class NeuralDependencyParser(NeuralParser):
 | 
			
		||||
cdef class DependencyParser(Parser):
 | 
			
		||||
    name = 'parser'
 | 
			
		||||
    TransitionSystem = ArcEager
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -800,17 +763,17 @@ cdef class NeuralDependencyParser(NeuralParser):
 | 
			
		|||
 | 
			
		||||
    def init_multitask_objectives(self, gold_tuples, pipeline, **cfg):
 | 
			
		||||
        for target in []:
 | 
			
		||||
            labeller = NeuralLabeller(self.vocab, target=target)
 | 
			
		||||
            labeller = MultitaskObjective(self.vocab, target=target)
 | 
			
		||||
            tok2vec = self.model[0]
 | 
			
		||||
            labeller.begin_training(gold_tuples, pipeline=pipeline, tok2vec=tok2vec)
 | 
			
		||||
            pipeline.append(labeller)
 | 
			
		||||
            self._multitasks.append(labeller)
 | 
			
		||||
 | 
			
		||||
    def __reduce__(self):
 | 
			
		||||
        return (NeuralDependencyParser, (self.vocab, self.moves, self.model), None, None)
 | 
			
		||||
        return (DependencyParser, (self.vocab, self.moves, self.model), None, None)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cdef class NeuralEntityRecognizer(NeuralParser):
 | 
			
		||||
cdef class EntityRecognizer(Parser):
 | 
			
		||||
    name = 'ner'
 | 
			
		||||
    TransitionSystem = BiluoPushDown
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -818,31 +781,14 @@ cdef class NeuralEntityRecognizer(NeuralParser):
 | 
			
		|||
 | 
			
		||||
    def init_multitask_objectives(self, gold_tuples, pipeline, **cfg):
 | 
			
		||||
        for target in []:
 | 
			
		||||
            labeller = NeuralLabeller(self.vocab, target=target)
 | 
			
		||||
            labeller = MultitaskObjective(self.vocab, target=target)
 | 
			
		||||
            tok2vec = self.model[0]
 | 
			
		||||
            labeller.begin_training(gold_tuples, pipeline=pipeline, tok2vec=tok2vec)
 | 
			
		||||
            pipeline.append(labeller)
 | 
			
		||||
            self._multitasks.append(labeller)
 | 
			
		||||
 | 
			
		||||
    def __reduce__(self):
 | 
			
		||||
        return (NeuralEntityRecognizer, (self.vocab, self.moves, self.model), None, None)
 | 
			
		||||
        return (EntityRecognizer, (self.vocab, self.moves, self.model), None, None)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cdef class BeamDependencyParser(BeamParser):
 | 
			
		||||
    TransitionSystem = ArcEager
 | 
			
		||||
 | 
			
		||||
    feature_templates = get_feature_templates('basic')
 | 
			
		||||
 | 
			
		||||
    def add_label(self, label):
 | 
			
		||||
        Parser.add_label(self, label)
 | 
			
		||||
        if isinstance(label, basestring):
 | 
			
		||||
            label = self.vocab.strings[label]
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def postprocesses(self):
 | 
			
		||||
        return [nonproj.deprojectivize]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
__all__ = ['Tagger', 'DependencyParser', 'EntityRecognizer', 'BeamDependencyParser',
 | 
			
		||||
           'BeamEntityRecognizer', 'TokenVectorEnoder']
 | 
			
		||||
__all__ = ['Tagger', 'DependencyParser', 'EntityRecognizer', 'Tensorizer']
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -10,7 +10,8 @@ import pytest
 | 
			
		|||
def test_doc_add_entities_set_ents_iob(en_vocab):
 | 
			
		||||
    text = ["This", "is", "a", "lion"]
 | 
			
		||||
    doc = get_doc(en_vocab, text)
 | 
			
		||||
    ner = EntityRecognizer(en_vocab, features=[(2,), (3,)])
 | 
			
		||||
    ner = EntityRecognizer(en_vocab)
 | 
			
		||||
    ner.begin_training([])
 | 
			
		||||
    ner(doc)
 | 
			
		||||
 | 
			
		||||
    assert len(list(doc.ents)) == 0
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -9,7 +9,7 @@ from ...attrs import NORM
 | 
			
		|||
from ...gold import GoldParse
 | 
			
		||||
from ...vocab import Vocab
 | 
			
		||||
from ...tokens import Doc
 | 
			
		||||
from ...pipeline import NeuralDependencyParser
 | 
			
		||||
from ...pipeline import DependencyParser
 | 
			
		||||
 | 
			
		||||
numpy.random.seed(0)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -21,7 +21,7 @@ def vocab():
 | 
			
		|||
 | 
			
		||||
@pytest.fixture
 | 
			
		||||
def parser(vocab):
 | 
			
		||||
    parser = NeuralDependencyParser(vocab)
 | 
			
		||||
    parser = DependencyParser(vocab)
 | 
			
		||||
    parser.cfg['token_vector_width'] = 8
 | 
			
		||||
    parser.cfg['hidden_width'] = 30
 | 
			
		||||
    parser.cfg['hist_size'] = 0
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -6,7 +6,7 @@ import numpy
 | 
			
		|||
 | 
			
		||||
from ..._ml import chain, Tok2Vec, doc2feats
 | 
			
		||||
from ...vocab import Vocab
 | 
			
		||||
from ...pipeline import TokenVectorEncoder
 | 
			
		||||
from ...pipeline import Tensorizer
 | 
			
		||||
from ...syntax.arc_eager import ArcEager
 | 
			
		||||
from ...syntax.nn_parser import Parser
 | 
			
		||||
from ...tokens.doc import Doc
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,7 +8,7 @@ from ...attrs import NORM
 | 
			
		|||
from ...gold import GoldParse
 | 
			
		||||
from ...vocab import Vocab
 | 
			
		||||
from ...tokens import Doc
 | 
			
		||||
from ...pipeline import NeuralDependencyParser
 | 
			
		||||
from ...pipeline import DependencyParser
 | 
			
		||||
 | 
			
		||||
@pytest.fixture
 | 
			
		||||
def vocab():
 | 
			
		||||
| 
						 | 
				
			
			@ -16,7 +16,7 @@ def vocab():
 | 
			
		|||
 | 
			
		||||
@pytest.fixture
 | 
			
		||||
def parser(vocab):
 | 
			
		||||
    parser = NeuralDependencyParser(vocab)
 | 
			
		||||
    parser = DependencyParser(vocab)
 | 
			
		||||
    parser.cfg['token_vector_width'] = 4
 | 
			
		||||
    parser.cfg['hidden_width'] = 32
 | 
			
		||||
    #parser.add_label('right')
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,11 +1,11 @@
 | 
			
		|||
import pytest
 | 
			
		||||
 | 
			
		||||
from ...pipeline import NeuralDependencyParser
 | 
			
		||||
from ...pipeline import DependencyParser
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.fixture
 | 
			
		||||
def parser(en_vocab):
 | 
			
		||||
    parser = NeuralDependencyParser(en_vocab)
 | 
			
		||||
    parser = DependencyParser(en_vocab)
 | 
			
		||||
    parser.add_label('nsubj')
 | 
			
		||||
    parser.model, cfg = parser.Model(parser.moves.n_moves)
 | 
			
		||||
    parser.cfg.update(cfg)
 | 
			
		||||
| 
						 | 
				
			
			@ -14,7 +14,7 @@ def parser(en_vocab):
 | 
			
		|||
 | 
			
		||||
@pytest.fixture
 | 
			
		||||
def blank_parser(en_vocab):
 | 
			
		||||
    parser = NeuralDependencyParser(en_vocab)
 | 
			
		||||
    parser = DependencyParser(en_vocab)
 | 
			
		||||
    return parser
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,8 +2,8 @@
 | 
			
		|||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from ..util import make_tempdir
 | 
			
		||||
from ...pipeline import NeuralDependencyParser as DependencyParser
 | 
			
		||||
from ...pipeline import NeuralEntityRecognizer as EntityRecognizer
 | 
			
		||||
from ...pipeline import DependencyParser
 | 
			
		||||
from ...pipeline import EntityRecognizer
 | 
			
		||||
 | 
			
		||||
import pytest
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,7 +2,7 @@
 | 
			
		|||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from ..util import make_tempdir
 | 
			
		||||
from ...pipeline import NeuralTagger as Tagger
 | 
			
		||||
from ...pipeline import Tagger
 | 
			
		||||
 | 
			
		||||
import pytest
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,7 +2,7 @@
 | 
			
		|||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from ..util import make_tempdir
 | 
			
		||||
from ...pipeline import TokenVectorEncoder as Tensorizer
 | 
			
		||||
from ...pipeline import Tensorizer
 | 
			
		||||
 | 
			
		||||
import pytest
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user