diff --git a/spacy/lang/ja/__init__.py b/spacy/lang/ja/__init__.py index 0d5f97ac8..e21e85cd9 100644 --- a/spacy/lang/ja/__init__.py +++ b/spacy/lang/ja/__init__.py @@ -32,7 +32,6 @@ split_mode = null """ -@registry.tokenizers("spacy.ja.JapaneseTokenizer") def create_tokenizer(split_mode: Optional[str] = None): def japanese_tokenizer_factory(nlp): return JapaneseTokenizer(nlp.vocab, split_mode=split_mode) diff --git a/spacy/lang/ko/__init__.py b/spacy/lang/ko/__init__.py index e2c860f7d..3231e191a 100644 --- a/spacy/lang/ko/__init__.py +++ b/spacy/lang/ko/__init__.py @@ -20,7 +20,6 @@ DEFAULT_CONFIG = """ """ -@registry.tokenizers("spacy.ko.KoreanTokenizer") def create_tokenizer(): def korean_tokenizer_factory(nlp): return KoreanTokenizer(nlp.vocab) diff --git a/spacy/lang/th/__init__.py b/spacy/lang/th/__init__.py index bd29d32a4..551f50eee 100644 --- a/spacy/lang/th/__init__.py +++ b/spacy/lang/th/__init__.py @@ -13,7 +13,6 @@ DEFAULT_CONFIG = """ """ -@registry.tokenizers("spacy.th.ThaiTokenizer") def create_thai_tokenizer(): def thai_tokenizer_factory(nlp): return ThaiTokenizer(nlp.vocab) diff --git a/spacy/lang/vi/__init__.py b/spacy/lang/vi/__init__.py index a621b8bfe..ae1fa469d 100644 --- a/spacy/lang/vi/__init__.py +++ b/spacy/lang/vi/__init__.py @@ -22,7 +22,6 @@ use_pyvi = true """ -@registry.tokenizers("spacy.vi.VietnameseTokenizer") def create_vietnamese_tokenizer(use_pyvi: bool = True): def vietnamese_tokenizer_factory(nlp): return VietnameseTokenizer(nlp.vocab, use_pyvi=use_pyvi) diff --git a/spacy/lang/zh/__init__.py b/spacy/lang/zh/__init__.py index f7bb09277..6ad044c60 100644 --- a/spacy/lang/zh/__init__.py +++ b/spacy/lang/zh/__init__.py @@ -46,7 +46,6 @@ class Segmenter(str, Enum): return list(cls.__members__.keys()) -@registry.tokenizers("spacy.zh.ChineseTokenizer") def create_chinese_tokenizer(segmenter: Segmenter = Segmenter.char): def chinese_tokenizer_factory(nlp): return ChineseTokenizer(nlp.vocab, segmenter=segmenter) diff --git a/spacy/language.py b/spacy/language.py index 93840c922..39c358232 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -104,7 +104,6 @@ class BaseDefaults: writing_system = {"direction": "ltr", "has_case": True, "has_letters": True} -@registry.tokenizers("spacy.Tokenizer.v1") def create_tokenizer() -> Callable[["Language"], Tokenizer]: """Registered function to create a tokenizer. Returns a factory that takes the nlp object and returns a Tokenizer instance using the language detaults. @@ -130,7 +129,6 @@ def create_tokenizer() -> Callable[["Language"], Tokenizer]: return tokenizer_factory -@registry.misc("spacy.LookupsDataLoader.v1") def load_lookups_data(lang, tables): util.logger.debug("Loading lookups from spacy-lookups-data: %s", tables) lookups = load_lookups(lang=lang, tables=tables) diff --git a/spacy/ml/models/multi_task.py b/spacy/ml/models/multi_task.py index b7faf1cd7..7c68fe481 100644 --- a/spacy/ml/models/multi_task.py +++ b/spacy/ml/models/multi_task.py @@ -30,7 +30,6 @@ if TYPE_CHECKING: from ...vocab import Vocab # noqa: F401 -@registry.architectures("spacy.PretrainVectors.v1") def create_pretrain_vectors( maxout_pieces: int, hidden_size: int, loss: str ) -> Callable[["Vocab", Model], Model]: @@ -57,7 +56,6 @@ def create_pretrain_vectors( return create_vectors_objective -@registry.architectures("spacy.PretrainCharacters.v1") def create_pretrain_characters( maxout_pieces: int, hidden_size: int, n_characters: int ) -> Callable[["Vocab", Model], Model]: diff --git a/spacy/ml/models/parser.py b/spacy/ml/models/parser.py index f6c0e565d..9ff0ac8ba 100644 --- a/spacy/ml/models/parser.py +++ b/spacy/ml/models/parser.py @@ -11,7 +11,6 @@ from .._precomputable_affine import PrecomputableAffine from ..tb_framework import TransitionModel -@registry.architectures("spacy.TransitionBasedParser.v2") def build_tb_parser_model( tok2vec: Model[List[Doc], List[Floats2d]], state_type: Literal["parser", "ner"], diff --git a/spacy/ml/models/tagger.py b/spacy/ml/models/tagger.py index 8f1554fab..aec4276db 100644 --- a/spacy/ml/models/tagger.py +++ b/spacy/ml/models/tagger.py @@ -7,7 +7,6 @@ from ...tokens import Doc from ...util import registry -@registry.architectures("spacy.Tagger.v2") def build_tagger_model( tok2vec: Model[List[Doc], List[Floats2d]], nO: Optional[int] = None, normalize=False ) -> Model[List[Doc], List[Floats2d]]: diff --git a/spacy/ml/tb_framework.py b/spacy/ml/tb_framework.py index e351ad4e5..16c894f6c 100644 --- a/spacy/ml/tb_framework.py +++ b/spacy/ml/tb_framework.py @@ -4,7 +4,6 @@ from ..util import registry from .parser_model import ParserStepModel -@registry.layers("spacy.TransitionModel.v1") def TransitionModel( tok2vec, lower, upper, resize_output, dropout=0.2, unseen_classes=set() ): diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx index fd85af775..881ec2dc4 100644 --- a/spacy/pipeline/dep_parser.pyx +++ b/spacy/pipeline/dep_parser.pyx @@ -66,7 +66,6 @@ def parser_score(examples, **kwargs): return results -@registry.scorers("spacy.parser_scorer.v1") def make_parser_scorer(): return parser_score diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index e93d04327..333f64d29 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -64,7 +64,6 @@ def morphologizer_score(examples, **kwargs): return results -@registry.scorers("spacy.morphologizer_scorer.v1") def make_morphologizer_scorer(): return morphologizer_score diff --git a/spacy/registrations.py b/spacy/registrations.py index 9d13bb56c..e1ff3acbd 100644 --- a/spacy/registrations.py +++ b/spacy/registrations.py @@ -43,6 +43,13 @@ def populate_registry() -> None: make_preserve_existing_ents_filter, ) from .pipeline.attributeruler import make_attribute_ruler_scorer + from .pipeline.dep_parser import make_parser_scorer + from .pipeline.morphologizer import make_morphologizer_scorer + from .lang.ja import create_tokenizer as create_japanese_tokenizer + from .lang.zh import create_chinese_tokenizer + from .lang.ko import create_tokenizer as create_korean_tokenizer + from .lang.vi import create_vietnamese_tokenizer + from .lang.th import create_thai_tokenizer # Import all pipeline components that were using registry decorators from .pipeline.tagger import make_tagger_scorer @@ -65,6 +72,7 @@ def populate_registry() -> None: registry.misc("spacy.EmptyKB.v1")(empty_kb) registry.misc("spacy.CandidateGenerator.v1")(create_candidates) registry.misc("spacy.CandidateBatchGenerator.v1")(create_candidates_batch) + registry.misc("spacy.LookupsDataLoader.v1")(load_lookups_data) # Need to get references to the existing functions in registry by importing the function that is there # For the registry that was previously decorated @@ -109,11 +117,13 @@ def populate_registry() -> None: ) from .ml.models.span_finder import build_finder_model from .ml.models.parser import build_tb_parser_model - from .ml.models.multi_task import create_pretrain_vectors + from .ml.models.multi_task import create_pretrain_vectors, create_pretrain_characters from .ml.models.tagger import build_tagger_model from .ml.staticvectors import StaticVectors from .ml._precomputable_affine import PrecomputableAffine from .ml._character_embed import CharacterEmbed + from .ml.tb_framework import TransitionModel + from .language import create_tokenizer, load_lookups_data from .matcher.levenshtein import make_levenshtein_compare from .training.callbacks import create_copy_from_base_model from .ml.callbacks import create_models_with_nvtx_range, create_models_and_pipes_with_nvtx_range @@ -150,6 +160,15 @@ def populate_registry() -> None: registry.scorers("spacy.entity_linker_scorer.v1")(make_entity_linker_scorer) registry.scorers("spacy.overlapping_labeled_spans_scorer.v1")(make_overlapping_labeled_spans_scorer) registry.scorers("spacy.attribute_ruler_scorer.v1")(make_attribute_ruler_scorer) + registry.scorers("spacy.parser_scorer.v1")(make_parser_scorer) + registry.scorers("spacy.morphologizer_scorer.v1")(make_morphologizer_scorer) + + # Register tokenizers + registry.tokenizers("spacy.ja.JapaneseTokenizer")(create_japanese_tokenizer) + registry.tokenizers("spacy.zh.ChineseTokenizer")(create_chinese_tokenizer) + registry.tokenizers("spacy.ko.KoreanTokenizer")(create_korean_tokenizer) + registry.tokenizers("spacy.vi.VietnameseTokenizer")(create_vietnamese_tokenizer) + registry.tokenizers("spacy.th.ThaiTokenizer")(create_thai_tokenizer) # Register tok2vec architectures we've modified registry.architectures("spacy.Tok2VecListener.v1")(tok2vec_listener_v1) @@ -172,6 +191,7 @@ def populate_registry() -> None: registry.architectures("spacy.SpanFinder.v1")(build_finder_model) registry.architectures("spacy.TransitionBasedParser.v2")(build_tb_parser_model) registry.architectures("spacy.PretrainVectors.v1")(create_pretrain_vectors) + registry.architectures("spacy.PretrainCharacters.v1")(create_pretrain_characters) registry.architectures("spacy.Tagger.v2")(build_tagger_model) # Register layers @@ -183,6 +203,7 @@ def populate_registry() -> None: registry.layers("spacy.StaticVectors.v2")(StaticVectors) registry.layers("spacy.PrecomputableAffine.v1")(PrecomputableAffine) registry.layers("spacy.CharEmbed.v1")(CharacterEmbed) + registry.layers("spacy.TransitionModel.v1")(TransitionModel) # Register callbacks registry.callbacks("spacy.copy_from_base_model.v1")(create_copy_from_base_model)