mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-12 17:22:25 +03:00
Move registrations
This commit is contained in:
parent
ab5f1c1013
commit
c8d7dd968a
|
@ -32,7 +32,6 @@ split_mode = null
|
|||
"""
|
||||
|
||||
|
||||
@registry.tokenizers("spacy.ja.JapaneseTokenizer")
|
||||
def create_tokenizer(split_mode: Optional[str] = None):
|
||||
def japanese_tokenizer_factory(nlp):
|
||||
return JapaneseTokenizer(nlp.vocab, split_mode=split_mode)
|
||||
|
|
|
@ -20,7 +20,6 @@ DEFAULT_CONFIG = """
|
|||
"""
|
||||
|
||||
|
||||
@registry.tokenizers("spacy.ko.KoreanTokenizer")
|
||||
def create_tokenizer():
|
||||
def korean_tokenizer_factory(nlp):
|
||||
return KoreanTokenizer(nlp.vocab)
|
||||
|
|
|
@ -13,7 +13,6 @@ DEFAULT_CONFIG = """
|
|||
"""
|
||||
|
||||
|
||||
@registry.tokenizers("spacy.th.ThaiTokenizer")
|
||||
def create_thai_tokenizer():
|
||||
def thai_tokenizer_factory(nlp):
|
||||
return ThaiTokenizer(nlp.vocab)
|
||||
|
|
|
@ -22,7 +22,6 @@ use_pyvi = true
|
|||
"""
|
||||
|
||||
|
||||
@registry.tokenizers("spacy.vi.VietnameseTokenizer")
|
||||
def create_vietnamese_tokenizer(use_pyvi: bool = True):
|
||||
def vietnamese_tokenizer_factory(nlp):
|
||||
return VietnameseTokenizer(nlp.vocab, use_pyvi=use_pyvi)
|
||||
|
|
|
@ -46,7 +46,6 @@ class Segmenter(str, Enum):
|
|||
return list(cls.__members__.keys())
|
||||
|
||||
|
||||
@registry.tokenizers("spacy.zh.ChineseTokenizer")
|
||||
def create_chinese_tokenizer(segmenter: Segmenter = Segmenter.char):
|
||||
def chinese_tokenizer_factory(nlp):
|
||||
return ChineseTokenizer(nlp.vocab, segmenter=segmenter)
|
||||
|
|
|
@ -104,7 +104,6 @@ class BaseDefaults:
|
|||
writing_system = {"direction": "ltr", "has_case": True, "has_letters": True}
|
||||
|
||||
|
||||
@registry.tokenizers("spacy.Tokenizer.v1")
|
||||
def create_tokenizer() -> Callable[["Language"], Tokenizer]:
|
||||
"""Registered function to create a tokenizer. Returns a factory that takes
|
||||
the nlp object and returns a Tokenizer instance using the language detaults.
|
||||
|
@ -130,7 +129,6 @@ def create_tokenizer() -> Callable[["Language"], Tokenizer]:
|
|||
return tokenizer_factory
|
||||
|
||||
|
||||
@registry.misc("spacy.LookupsDataLoader.v1")
|
||||
def load_lookups_data(lang, tables):
|
||||
util.logger.debug("Loading lookups from spacy-lookups-data: %s", tables)
|
||||
lookups = load_lookups(lang=lang, tables=tables)
|
||||
|
|
|
@ -30,7 +30,6 @@ if TYPE_CHECKING:
|
|||
from ...vocab import Vocab # noqa: F401
|
||||
|
||||
|
||||
@registry.architectures("spacy.PretrainVectors.v1")
|
||||
def create_pretrain_vectors(
|
||||
maxout_pieces: int, hidden_size: int, loss: str
|
||||
) -> Callable[["Vocab", Model], Model]:
|
||||
|
@ -57,7 +56,6 @@ def create_pretrain_vectors(
|
|||
return create_vectors_objective
|
||||
|
||||
|
||||
@registry.architectures("spacy.PretrainCharacters.v1")
|
||||
def create_pretrain_characters(
|
||||
maxout_pieces: int, hidden_size: int, n_characters: int
|
||||
) -> Callable[["Vocab", Model], Model]:
|
||||
|
|
|
@ -11,7 +11,6 @@ from .._precomputable_affine import PrecomputableAffine
|
|||
from ..tb_framework import TransitionModel
|
||||
|
||||
|
||||
@registry.architectures("spacy.TransitionBasedParser.v2")
|
||||
def build_tb_parser_model(
|
||||
tok2vec: Model[List[Doc], List[Floats2d]],
|
||||
state_type: Literal["parser", "ner"],
|
||||
|
|
|
@ -7,7 +7,6 @@ from ...tokens import Doc
|
|||
from ...util import registry
|
||||
|
||||
|
||||
@registry.architectures("spacy.Tagger.v2")
|
||||
def build_tagger_model(
|
||||
tok2vec: Model[List[Doc], List[Floats2d]], nO: Optional[int] = None, normalize=False
|
||||
) -> Model[List[Doc], List[Floats2d]]:
|
||||
|
|
|
@ -4,7 +4,6 @@ from ..util import registry
|
|||
from .parser_model import ParserStepModel
|
||||
|
||||
|
||||
@registry.layers("spacy.TransitionModel.v1")
|
||||
def TransitionModel(
|
||||
tok2vec, lower, upper, resize_output, dropout=0.2, unseen_classes=set()
|
||||
):
|
||||
|
|
|
@ -66,7 +66,6 @@ def parser_score(examples, **kwargs):
|
|||
return results
|
||||
|
||||
|
||||
@registry.scorers("spacy.parser_scorer.v1")
|
||||
def make_parser_scorer():
|
||||
return parser_score
|
||||
|
||||
|
|
|
@ -64,7 +64,6 @@ def morphologizer_score(examples, **kwargs):
|
|||
return results
|
||||
|
||||
|
||||
@registry.scorers("spacy.morphologizer_scorer.v1")
|
||||
def make_morphologizer_scorer():
|
||||
return morphologizer_score
|
||||
|
||||
|
|
|
@ -43,6 +43,13 @@ def populate_registry() -> None:
|
|||
make_preserve_existing_ents_filter,
|
||||
)
|
||||
from .pipeline.attributeruler import make_attribute_ruler_scorer
|
||||
from .pipeline.dep_parser import make_parser_scorer
|
||||
from .pipeline.morphologizer import make_morphologizer_scorer
|
||||
from .lang.ja import create_tokenizer as create_japanese_tokenizer
|
||||
from .lang.zh import create_chinese_tokenizer
|
||||
from .lang.ko import create_tokenizer as create_korean_tokenizer
|
||||
from .lang.vi import create_vietnamese_tokenizer
|
||||
from .lang.th import create_thai_tokenizer
|
||||
|
||||
# Import all pipeline components that were using registry decorators
|
||||
from .pipeline.tagger import make_tagger_scorer
|
||||
|
@ -65,6 +72,7 @@ def populate_registry() -> None:
|
|||
registry.misc("spacy.EmptyKB.v1")(empty_kb)
|
||||
registry.misc("spacy.CandidateGenerator.v1")(create_candidates)
|
||||
registry.misc("spacy.CandidateBatchGenerator.v1")(create_candidates_batch)
|
||||
registry.misc("spacy.LookupsDataLoader.v1")(load_lookups_data)
|
||||
|
||||
# Need to get references to the existing functions in registry by importing the function that is there
|
||||
# For the registry that was previously decorated
|
||||
|
@ -109,11 +117,13 @@ def populate_registry() -> None:
|
|||
)
|
||||
from .ml.models.span_finder import build_finder_model
|
||||
from .ml.models.parser import build_tb_parser_model
|
||||
from .ml.models.multi_task import create_pretrain_vectors
|
||||
from .ml.models.multi_task import create_pretrain_vectors, create_pretrain_characters
|
||||
from .ml.models.tagger import build_tagger_model
|
||||
from .ml.staticvectors import StaticVectors
|
||||
from .ml._precomputable_affine import PrecomputableAffine
|
||||
from .ml._character_embed import CharacterEmbed
|
||||
from .ml.tb_framework import TransitionModel
|
||||
from .language import create_tokenizer, load_lookups_data
|
||||
from .matcher.levenshtein import make_levenshtein_compare
|
||||
from .training.callbacks import create_copy_from_base_model
|
||||
from .ml.callbacks import create_models_with_nvtx_range, create_models_and_pipes_with_nvtx_range
|
||||
|
@ -150,6 +160,15 @@ def populate_registry() -> None:
|
|||
registry.scorers("spacy.entity_linker_scorer.v1")(make_entity_linker_scorer)
|
||||
registry.scorers("spacy.overlapping_labeled_spans_scorer.v1")(make_overlapping_labeled_spans_scorer)
|
||||
registry.scorers("spacy.attribute_ruler_scorer.v1")(make_attribute_ruler_scorer)
|
||||
registry.scorers("spacy.parser_scorer.v1")(make_parser_scorer)
|
||||
registry.scorers("spacy.morphologizer_scorer.v1")(make_morphologizer_scorer)
|
||||
|
||||
# Register tokenizers
|
||||
registry.tokenizers("spacy.ja.JapaneseTokenizer")(create_japanese_tokenizer)
|
||||
registry.tokenizers("spacy.zh.ChineseTokenizer")(create_chinese_tokenizer)
|
||||
registry.tokenizers("spacy.ko.KoreanTokenizer")(create_korean_tokenizer)
|
||||
registry.tokenizers("spacy.vi.VietnameseTokenizer")(create_vietnamese_tokenizer)
|
||||
registry.tokenizers("spacy.th.ThaiTokenizer")(create_thai_tokenizer)
|
||||
|
||||
# Register tok2vec architectures we've modified
|
||||
registry.architectures("spacy.Tok2VecListener.v1")(tok2vec_listener_v1)
|
||||
|
@ -172,6 +191,7 @@ def populate_registry() -> None:
|
|||
registry.architectures("spacy.SpanFinder.v1")(build_finder_model)
|
||||
registry.architectures("spacy.TransitionBasedParser.v2")(build_tb_parser_model)
|
||||
registry.architectures("spacy.PretrainVectors.v1")(create_pretrain_vectors)
|
||||
registry.architectures("spacy.PretrainCharacters.v1")(create_pretrain_characters)
|
||||
registry.architectures("spacy.Tagger.v2")(build_tagger_model)
|
||||
|
||||
# Register layers
|
||||
|
@ -183,6 +203,7 @@ def populate_registry() -> None:
|
|||
registry.layers("spacy.StaticVectors.v2")(StaticVectors)
|
||||
registry.layers("spacy.PrecomputableAffine.v1")(PrecomputableAffine)
|
||||
registry.layers("spacy.CharEmbed.v1")(CharacterEmbed)
|
||||
registry.layers("spacy.TransitionModel.v1")(TransitionModel)
|
||||
|
||||
# Register callbacks
|
||||
registry.callbacks("spacy.copy_from_base_model.v1")(create_copy_from_base_model)
|
||||
|
|
Loading…
Reference in New Issue
Block a user