diff --git a/spacy/matcher/levenshtein.pyx b/spacy/matcher/levenshtein.pyx index 1f9cd1ba2..1bafdbbcb 100644 --- a/spacy/matcher/levenshtein.pyx +++ b/spacy/matcher/levenshtein.pyx @@ -27,6 +27,5 @@ cpdef bint levenshtein_compare(input_text: str, pattern_text: str, fuzzy: int = return levenshtein(input_text, pattern_text, max_edits) <= max_edits -@registry.misc("spacy.levenshtein_compare.v1") def make_levenshtein_compare(): return levenshtein_compare diff --git a/spacy/ml/extract_ngrams.py b/spacy/ml/extract_ngrams.py index ce7c585cc..d57197312 100644 --- a/spacy/ml/extract_ngrams.py +++ b/spacy/ml/extract_ngrams.py @@ -4,7 +4,6 @@ from ..attrs import LOWER from ..util import registry -@registry.layers("spacy.extract_ngrams.v1") def extract_ngrams(ngram_size: int, attr: int = LOWER) -> Model: model: Model = Model("extract_ngrams", forward) model.attrs["ngram_size"] = ngram_size diff --git a/spacy/ml/extract_spans.py b/spacy/ml/extract_spans.py index ac0f5fa1b..d3456b705 100644 --- a/spacy/ml/extract_spans.py +++ b/spacy/ml/extract_spans.py @@ -6,7 +6,6 @@ from thinc.types import Ints1d, Ragged from ..util import registry -@registry.layers("spacy.extract_spans.v1") def extract_spans() -> Model[Tuple[Ragged, Ragged], Ragged]: """Extract spans from a sequence of source arrays, as specified by an array of (start, end) indices. The output is a ragged array of the diff --git a/spacy/ml/featureextractor.py b/spacy/ml/featureextractor.py index a87b4156c..2f869ad65 100644 --- a/spacy/ml/featureextractor.py +++ b/spacy/ml/featureextractor.py @@ -6,7 +6,6 @@ from thinc.types import Ints2d from ..tokens import Doc -@registry.layers("spacy.FeatureExtractor.v1") def FeatureExtractor( columns: Union[List[str], List[int], List[Union[int, str]]] ) -> Model[List[Doc], List[Ints2d]]: diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py index b7100c00a..752d1c443 100644 --- a/spacy/ml/models/entity_linker.py +++ b/spacy/ml/models/entity_linker.py @@ -28,7 +28,6 @@ from ...vocab import Vocab from ..extract_spans import extract_spans -@registry.architectures("spacy.EntityLinker.v2") def build_nel_encoder( tok2vec: Model, nO: Optional[int] = None ) -> Model[List[Doc], Floats2d]: @@ -92,7 +91,6 @@ def span_maker_forward(model, docs: List[Doc], is_train) -> Tuple[Ragged, Callab return out, lambda x: [] -@registry.misc("spacy.KBFromFile.v1") def load_kb( kb_path: Path, ) -> Callable[[Vocab], KnowledgeBase]: @@ -104,7 +102,6 @@ def load_kb( return kb_from_file -@registry.misc("spacy.EmptyKB.v2") def empty_kb_for_config() -> Callable[[Vocab, int], KnowledgeBase]: def empty_kb_factory(vocab: Vocab, entity_vector_length: int): return InMemoryLookupKB(vocab=vocab, entity_vector_length=entity_vector_length) @@ -112,7 +109,6 @@ def empty_kb_for_config() -> Callable[[Vocab, int], KnowledgeBase]: return empty_kb_factory -@registry.misc("spacy.EmptyKB.v1") def empty_kb( entity_vector_length: int, ) -> Callable[[Vocab], KnowledgeBase]: @@ -122,12 +118,10 @@ def empty_kb( return empty_kb_factory -@registry.misc("spacy.CandidateGenerator.v1") def create_candidates() -> Callable[[KnowledgeBase, Span], Iterable[Candidate]]: return get_candidates -@registry.misc("spacy.CandidateBatchGenerator.v1") def create_candidates_batch() -> Callable[ [KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]] ]: diff --git a/spacy/registrations.py b/spacy/registrations.py index 3133197c4..aa7463998 100644 --- a/spacy/registrations.py +++ b/spacy/registrations.py @@ -59,6 +59,12 @@ def populate_registry() -> None: registry.misc("spacy.preset_spans_suggester.v1")(build_preset_spans_suggester) registry.misc("spacy.prioritize_new_ents_filter.v1")(make_prioritize_new_ents_filter) registry.misc("spacy.prioritize_existing_ents_filter.v1")(make_preserve_existing_ents_filter) + registry.misc("spacy.levenshtein_compare.v1")(make_levenshtein_compare) + registry.misc("spacy.KBFromFile.v1")(load_kb) + registry.misc("spacy.EmptyKB.v2")(empty_kb_for_config) + registry.misc("spacy.EmptyKB.v1")(empty_kb) + registry.misc("spacy.CandidateGenerator.v1")(create_candidates) + registry.misc("spacy.CandidateBatchGenerator.v1")(create_candidates_batch) # Need to get references to the existing functions in registry by importing the function that is there # For the registry that was previously decorated @@ -74,6 +80,20 @@ def populate_registry() -> None: build_Tok2Vec_model, tok2vec_listener_v1, ) + + # Import decorator-removed ML components + from .ml.featureextractor import FeatureExtractor + from .ml.extract_spans import extract_spans + from .ml.extract_ngrams import extract_ngrams + from .ml.models.entity_linker import ( + build_nel_encoder, + load_kb, + empty_kb_for_config, + empty_kb, + create_candidates, + create_candidates_batch + ) + from .matcher.levenshtein import make_levenshtein_compare # Register scorers registry.scorers("spacy.tagger_scorer.v1")(make_tagger_scorer) @@ -106,6 +126,12 @@ def populate_registry() -> None: registry.architectures("spacy.MaxoutWindowEncoder.v2")(MaxoutWindowEncoder) registry.architectures("spacy.MishWindowEncoder.v2")(MishWindowEncoder) registry.architectures("spacy.TorchBiLSTMEncoder.v1")(BiLSTMEncoder) + registry.architectures("spacy.EntityLinker.v2")(build_nel_encoder) + + # Register layers + registry.layers("spacy.FeatureExtractor.v1")(FeatureExtractor) + registry.layers("spacy.extract_spans.v1")(extract_spans) + registry.layers("spacy.extract_ngrams.v1")(extract_ngrams) # Set the flag to indicate that the registry has been populated REGISTRY_POPULATED = True