Move more layers

This commit is contained in:
Matthew Honnibal 2025-05-21 22:53:07 +02:00
parent 959b80d823
commit 24b0670be7
6 changed files with 26 additions and 10 deletions

View File

@ -27,6 +27,5 @@ cpdef bint levenshtein_compare(input_text: str, pattern_text: str, fuzzy: int =
return levenshtein(input_text, pattern_text, max_edits) <= max_edits
@registry.misc("spacy.levenshtein_compare.v1")
def make_levenshtein_compare():
return levenshtein_compare

View File

@ -4,7 +4,6 @@ from ..attrs import LOWER
from ..util import registry
@registry.layers("spacy.extract_ngrams.v1")
def extract_ngrams(ngram_size: int, attr: int = LOWER) -> Model:
model: Model = Model("extract_ngrams", forward)
model.attrs["ngram_size"] = ngram_size

View File

@ -6,7 +6,6 @@ from thinc.types import Ints1d, Ragged
from ..util import registry
@registry.layers("spacy.extract_spans.v1")
def extract_spans() -> Model[Tuple[Ragged, Ragged], Ragged]:
"""Extract spans from a sequence of source arrays, as specified by an array
of (start, end) indices. The output is a ragged array of the

View File

@ -6,7 +6,6 @@ from thinc.types import Ints2d
from ..tokens import Doc
@registry.layers("spacy.FeatureExtractor.v1")
def FeatureExtractor(
columns: Union[List[str], List[int], List[Union[int, str]]]
) -> Model[List[Doc], List[Ints2d]]:

View File

@ -28,7 +28,6 @@ from ...vocab import Vocab
from ..extract_spans import extract_spans
@registry.architectures("spacy.EntityLinker.v2")
def build_nel_encoder(
tok2vec: Model, nO: Optional[int] = None
) -> Model[List[Doc], Floats2d]:
@ -92,7 +91,6 @@ def span_maker_forward(model, docs: List[Doc], is_train) -> Tuple[Ragged, Callab
return out, lambda x: []
@registry.misc("spacy.KBFromFile.v1")
def load_kb(
kb_path: Path,
) -> Callable[[Vocab], KnowledgeBase]:
@ -104,7 +102,6 @@ def load_kb(
return kb_from_file
@registry.misc("spacy.EmptyKB.v2")
def empty_kb_for_config() -> Callable[[Vocab, int], KnowledgeBase]:
def empty_kb_factory(vocab: Vocab, entity_vector_length: int):
return InMemoryLookupKB(vocab=vocab, entity_vector_length=entity_vector_length)
@ -112,7 +109,6 @@ def empty_kb_for_config() -> Callable[[Vocab, int], KnowledgeBase]:
return empty_kb_factory
@registry.misc("spacy.EmptyKB.v1")
def empty_kb(
entity_vector_length: int,
) -> Callable[[Vocab], KnowledgeBase]:
@ -122,12 +118,10 @@ def empty_kb(
return empty_kb_factory
@registry.misc("spacy.CandidateGenerator.v1")
def create_candidates() -> Callable[[KnowledgeBase, Span], Iterable[Candidate]]:
return get_candidates
@registry.misc("spacy.CandidateBatchGenerator.v1")
def create_candidates_batch() -> Callable[
[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]
]:

View File

@ -59,6 +59,12 @@ def populate_registry() -> None:
registry.misc("spacy.preset_spans_suggester.v1")(build_preset_spans_suggester)
registry.misc("spacy.prioritize_new_ents_filter.v1")(make_prioritize_new_ents_filter)
registry.misc("spacy.prioritize_existing_ents_filter.v1")(make_preserve_existing_ents_filter)
registry.misc("spacy.levenshtein_compare.v1")(make_levenshtein_compare)
registry.misc("spacy.KBFromFile.v1")(load_kb)
registry.misc("spacy.EmptyKB.v2")(empty_kb_for_config)
registry.misc("spacy.EmptyKB.v1")(empty_kb)
registry.misc("spacy.CandidateGenerator.v1")(create_candidates)
registry.misc("spacy.CandidateBatchGenerator.v1")(create_candidates_batch)
# Need to get references to the existing functions in registry by importing the function that is there
# For the registry that was previously decorated
@ -74,6 +80,20 @@ def populate_registry() -> None:
build_Tok2Vec_model,
tok2vec_listener_v1,
)
# Import decorator-removed ML components
from .ml.featureextractor import FeatureExtractor
from .ml.extract_spans import extract_spans
from .ml.extract_ngrams import extract_ngrams
from .ml.models.entity_linker import (
build_nel_encoder,
load_kb,
empty_kb_for_config,
empty_kb,
create_candidates,
create_candidates_batch
)
from .matcher.levenshtein import make_levenshtein_compare
# Register scorers
registry.scorers("spacy.tagger_scorer.v1")(make_tagger_scorer)
@ -106,6 +126,12 @@ def populate_registry() -> None:
registry.architectures("spacy.MaxoutWindowEncoder.v2")(MaxoutWindowEncoder)
registry.architectures("spacy.MishWindowEncoder.v2")(MishWindowEncoder)
registry.architectures("spacy.TorchBiLSTMEncoder.v1")(BiLSTMEncoder)
registry.architectures("spacy.EntityLinker.v2")(build_nel_encoder)
# Register layers
registry.layers("spacy.FeatureExtractor.v1")(FeatureExtractor)
registry.layers("spacy.extract_spans.v1")(extract_spans)
registry.layers("spacy.extract_ngrams.v1")(extract_ngrams)
# Set the flag to indicate that the registry has been populated
REGISTRY_POPULATED = True