mirror of
https://github.com/explosion/spaCy.git
synced 2026-01-08 01:31:19 +03:00
181 lines
8.2 KiB
Python
181 lines
8.2 KiB
Python
"""Centralized registry population for spaCy config
|
|
|
|
This module centralizes registry decorations to prevent circular import issues
|
|
with Cython annotation changes from __future__ import annotations. Functions
|
|
remain in their original locations, but decoration is moved here.
|
|
|
|
Component definitions and registrations are in spacy/pipeline/factories.py
|
|
"""
|
|
# Global flag to track if registry has been populated
|
|
REGISTRY_POPULATED = False
|
|
|
|
|
|
def populate_registry() -> None:
|
|
"""Populate the registry with all necessary components.
|
|
|
|
This function should be called before accessing the registry, to ensure
|
|
it's populated. The function uses a global flag to prevent repopulation.
|
|
"""
|
|
global REGISTRY_POPULATED
|
|
if REGISTRY_POPULATED:
|
|
return
|
|
|
|
# Import all necessary modules
|
|
from .pipeline.entityruler import (
|
|
make_entity_ruler_scorer as make_entityruler_scorer,
|
|
)
|
|
from .pipeline.lemmatizer import make_lemmatizer_scorer
|
|
from .pipeline.ner import make_ner_scorer
|
|
from .pipeline.sentencizer import senter_score as make_sentencizer_scorer
|
|
from .pipeline.senter import make_senter_scorer
|
|
from .pipeline.span_finder import make_span_finder_scorer
|
|
from .pipeline.spancat import (
|
|
build_ngram_range_suggester,
|
|
build_ngram_suggester,
|
|
build_preset_spans_suggester,
|
|
make_spancat_scorer,
|
|
)
|
|
# Import the functions we refactored by removing direct registry decorators
|
|
from .pipeline.entity_linker import make_entity_linker_scorer
|
|
from .pipeline.span_ruler import (
|
|
make_overlapping_labeled_spans_scorer,
|
|
make_prioritize_new_ents_filter,
|
|
make_preserve_existing_ents_filter,
|
|
)
|
|
from .pipeline.attributeruler import make_attribute_ruler_scorer
|
|
|
|
# Import all pipeline components that were using registry decorators
|
|
from .pipeline.tagger import make_tagger_scorer
|
|
from .pipeline.textcat import make_textcat_scorer
|
|
from .pipeline.textcat_multilabel import make_textcat_multilabel_scorer
|
|
from .util import make_first_longest_spans_filter, registry
|
|
|
|
# Register miscellaneous components
|
|
registry.misc("spacy.first_longest_spans_filter.v1")(
|
|
make_first_longest_spans_filter
|
|
)
|
|
registry.misc("spacy.ngram_suggester.v1")(build_ngram_suggester)
|
|
registry.misc("spacy.ngram_range_suggester.v1")(build_ngram_range_suggester)
|
|
registry.misc("spacy.preset_spans_suggester.v1")(build_preset_spans_suggester)
|
|
registry.misc("spacy.prioritize_new_ents_filter.v1")(make_prioritize_new_ents_filter)
|
|
registry.misc("spacy.prioritize_existing_ents_filter.v1")(make_preserve_existing_ents_filter)
|
|
registry.misc("spacy.levenshtein_compare.v1")(make_levenshtein_compare)
|
|
registry.misc("spacy.KBFromFile.v1")(load_kb)
|
|
registry.misc("spacy.EmptyKB.v2")(empty_kb_for_config)
|
|
registry.misc("spacy.EmptyKB.v1")(empty_kb)
|
|
registry.misc("spacy.CandidateGenerator.v1")(create_candidates)
|
|
registry.misc("spacy.CandidateBatchGenerator.v1")(create_candidates_batch)
|
|
|
|
# Need to get references to the existing functions in registry by importing the function that is there
|
|
# For the registry that was previously decorated
|
|
|
|
# Import ML components that use registry
|
|
from .ml.models.tok2vec import (
|
|
BiLSTMEncoder,
|
|
CharacterEmbed,
|
|
MaxoutWindowEncoder,
|
|
MishWindowEncoder,
|
|
MultiHashEmbed,
|
|
build_hash_embed_cnn_tok2vec,
|
|
build_Tok2Vec_model,
|
|
tok2vec_listener_v1,
|
|
)
|
|
|
|
# Import decorator-removed ML components
|
|
from .ml.featureextractor import FeatureExtractor
|
|
from .ml.extract_spans import extract_spans
|
|
from .ml.extract_ngrams import extract_ngrams
|
|
from .ml.models.entity_linker import (
|
|
build_nel_encoder,
|
|
load_kb,
|
|
empty_kb_for_config,
|
|
empty_kb,
|
|
create_candidates,
|
|
create_candidates_batch
|
|
)
|
|
from .ml.models.textcat import (
|
|
build_simple_cnn_text_classifier,
|
|
build_bow_text_classifier,
|
|
build_bow_text_classifier_v3,
|
|
build_text_classifier_v2,
|
|
build_text_classifier_lowdata,
|
|
build_textcat_parametric_attention_v1,
|
|
build_reduce_text_classifier
|
|
)
|
|
from .ml.models.spancat import (
|
|
build_linear_logistic,
|
|
build_mean_max_reducer,
|
|
build_spancat_model
|
|
)
|
|
from .matcher.levenshtein import make_levenshtein_compare
|
|
from .training.callbacks import create_copy_from_base_model
|
|
from .training.loggers import console_logger, console_logger_v3
|
|
from .training.batchers import (
|
|
configure_minibatch_by_padded_size,
|
|
configure_minibatch_by_words,
|
|
configure_minibatch
|
|
)
|
|
|
|
# Register scorers
|
|
registry.scorers("spacy.tagger_scorer.v1")(make_tagger_scorer)
|
|
registry.scorers("spacy.ner_scorer.v1")(make_ner_scorer)
|
|
# span_ruler_scorer removed as it's not in span_ruler.py
|
|
registry.scorers("spacy.entity_ruler_scorer.v1")(make_entityruler_scorer)
|
|
registry.scorers("spacy.sentencizer_scorer.v1")(make_sentencizer_scorer)
|
|
registry.scorers("spacy.senter_scorer.v1")(make_senter_scorer)
|
|
registry.scorers("spacy.textcat_scorer.v1")(make_textcat_scorer)
|
|
registry.scorers("spacy.textcat_scorer.v2")(make_textcat_scorer)
|
|
registry.scorers("spacy.textcat_multilabel_scorer.v1")(
|
|
make_textcat_multilabel_scorer
|
|
)
|
|
registry.scorers("spacy.textcat_multilabel_scorer.v2")(
|
|
make_textcat_multilabel_scorer
|
|
)
|
|
registry.scorers("spacy.lemmatizer_scorer.v1")(make_lemmatizer_scorer)
|
|
registry.scorers("spacy.span_finder_scorer.v1")(make_span_finder_scorer)
|
|
registry.scorers("spacy.spancat_scorer.v1")(make_spancat_scorer)
|
|
registry.scorers("spacy.entity_linker_scorer.v1")(make_entity_linker_scorer)
|
|
registry.scorers("spacy.overlapping_labeled_spans_scorer.v1")(make_overlapping_labeled_spans_scorer)
|
|
registry.scorers("spacy.attribute_ruler_scorer.v1")(make_attribute_ruler_scorer)
|
|
|
|
# Register tok2vec architectures we've modified
|
|
registry.architectures("spacy.Tok2VecListener.v1")(tok2vec_listener_v1)
|
|
registry.architectures("spacy.HashEmbedCNN.v2")(build_hash_embed_cnn_tok2vec)
|
|
registry.architectures("spacy.Tok2Vec.v2")(build_Tok2Vec_model)
|
|
registry.architectures("spacy.MultiHashEmbed.v2")(MultiHashEmbed)
|
|
registry.architectures("spacy.CharacterEmbed.v2")(CharacterEmbed)
|
|
registry.architectures("spacy.MaxoutWindowEncoder.v2")(MaxoutWindowEncoder)
|
|
registry.architectures("spacy.MishWindowEncoder.v2")(MishWindowEncoder)
|
|
registry.architectures("spacy.TorchBiLSTMEncoder.v1")(BiLSTMEncoder)
|
|
registry.architectures("spacy.EntityLinker.v2")(build_nel_encoder)
|
|
registry.architectures("spacy.TextCatCNN.v2")(build_simple_cnn_text_classifier)
|
|
registry.architectures("spacy.TextCatBOW.v2")(build_bow_text_classifier)
|
|
registry.architectures("spacy.TextCatBOW.v3")(build_bow_text_classifier_v3)
|
|
registry.architectures("spacy.TextCatEnsemble.v2")(build_text_classifier_v2)
|
|
registry.architectures("spacy.TextCatLowData.v1")(build_text_classifier_lowdata)
|
|
registry.architectures("spacy.TextCatParametricAttention.v1")(build_textcat_parametric_attention_v1)
|
|
registry.architectures("spacy.TextCatReduce.v1")(build_reduce_text_classifier)
|
|
registry.architectures("spacy.SpanCategorizer.v1")(build_spancat_model)
|
|
|
|
# Register layers
|
|
registry.layers("spacy.FeatureExtractor.v1")(FeatureExtractor)
|
|
registry.layers("spacy.extract_spans.v1")(extract_spans)
|
|
registry.layers("spacy.extract_ngrams.v1")(extract_ngrams)
|
|
registry.layers("spacy.LinearLogistic.v1")(build_linear_logistic)
|
|
registry.layers("spacy.mean_max_reducer.v1")(build_mean_max_reducer)
|
|
|
|
# Register callbacks
|
|
registry.callbacks("spacy.copy_from_base_model.v1")(create_copy_from_base_model)
|
|
|
|
# Register loggers
|
|
registry.loggers("spacy.ConsoleLogger.v2")(console_logger)
|
|
registry.loggers("spacy.ConsoleLogger.v3")(console_logger_v3)
|
|
|
|
# Register batchers
|
|
registry.batchers("spacy.batch_by_padded.v1")(configure_minibatch_by_padded_size)
|
|
registry.batchers("spacy.batch_by_words.v1")(configure_minibatch_by_words)
|
|
registry.batchers("spacy.batch_by_sequence.v1")(configure_minibatch)
|
|
|
|
# Set the flag to indicate that the registry has been populated
|
|
REGISTRY_POPULATED = True
|