Move imports

This commit is contained in:
Matthew Honnibal 2025-05-20 18:34:01 +02:00
parent fd91e8a0e4
commit 216ef7ee7a

View File

@ -25,6 +25,40 @@ from .pipeline.dep_parser import DependencyParser
from .pipeline.tagger import Tagger from .pipeline.tagger import Tagger
from .pipeline.multitask import MultitaskObjective from .pipeline.multitask import MultitaskObjective
from .pipeline.senter import SentenceRecognizer from .pipeline.senter import SentenceRecognizer
from .language import Language
from .pipeline.sentencizer import Sentencizer
# Import factory default configurations
from .pipeline.entity_linker import DEFAULT_NEL_MODEL
from .pipeline.entityruler import DEFAULT_ENT_ID_SEP
from .pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
from .pipeline.senter import DEFAULT_SENTER_MODEL
from .pipeline.morphologizer import DEFAULT_MORPH_MODEL
from .pipeline.spancat import (
DEFAULT_SPANCAT_MODEL,
DEFAULT_SPANCAT_SINGLELABEL_MODEL,
DEFAULT_SPANS_KEY,
)
from .pipeline.span_ruler import DEFAULT_SPANS_KEY as SPAN_RULER_DEFAULT_SPANS_KEY
from .pipeline.edit_tree_lemmatizer import DEFAULT_EDIT_TREE_LEMMATIZER_MODEL
from .pipeline.textcat_multilabel import DEFAULT_MULTI_TEXTCAT_MODEL
from .pipeline.span_finder import DEFAULT_SPAN_FINDER_MODEL
from .pipeline.ner import DEFAULT_NER_MODEL
from .pipeline.dep_parser import DEFAULT_PARSER_MODEL
from .pipeline.tagger import DEFAULT_TAGGER_MODEL
from .pipeline.multitask import DEFAULT_MT_MODEL
from .pipeline.textcat import DEFAULT_SINGLE_TEXTCAT_MODEL
from .pipeline.entity_linker import EntityLinker, EntityLinker_v1
from .pipeline.attributeruler import AttributeRuler
from .pipeline.spancat import SpanCategorizer
from .pipeline.lemmatizer import Lemmatizer
from .pipeline.functions import TokenSplitter
from .pipeline.functions import DocCleaner
from .pipeline.span_ruler import SpanRuler, prioritize_new_ents_filter, prioritize_existing_ents_filter
from .pipeline.span_ruler import SpanRuler
from .pipeline.edit_tree_lemmatizer import EditTreeLemmatizer
from .pipeline.morphologizer import Morphologizer
# Global flag to track if registry has been populated # Global flag to track if registry has been populated
REGISTRY_POPULATED = False REGISTRY_POPULATED = False
@ -135,30 +169,6 @@ def register_factories() -> None:
if FACTORIES_REGISTERED: if FACTORIES_REGISTERED:
return return
from .language import Language
from .pipeline.sentencizer import Sentencizer
# Import factory default configurations
from .pipeline.entity_linker import DEFAULT_NEL_MODEL
from .pipeline.entityruler import DEFAULT_ENT_ID_SEP
from .pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
from .pipeline.senter import DEFAULT_SENTER_MODEL
from .pipeline.morphologizer import DEFAULT_MORPH_MODEL
from .pipeline.spancat import (
DEFAULT_SPANCAT_MODEL,
DEFAULT_SPANCAT_SINGLELABEL_MODEL,
DEFAULT_SPANS_KEY,
)
from .pipeline.span_ruler import DEFAULT_SPANS_KEY as SPAN_RULER_DEFAULT_SPANS_KEY
from .pipeline.edit_tree_lemmatizer import DEFAULT_EDIT_TREE_LEMMATIZER_MODEL
from .pipeline.textcat_multilabel import DEFAULT_MULTI_TEXTCAT_MODEL
from .pipeline.span_finder import DEFAULT_SPAN_FINDER_MODEL
from .pipeline.ner import DEFAULT_NER_MODEL
from .pipeline.dep_parser import DEFAULT_PARSER_MODEL
from .pipeline.tagger import DEFAULT_TAGGER_MODEL
from .pipeline.multitask import DEFAULT_MT_MODEL
from .pipeline.textcat import DEFAULT_SINGLE_TEXTCAT_MODEL
# We can't have function implementations for these factories in Cython, because # We can't have function implementations for these factories in Cython, because
# we need to build a Pydantic model for them dynamically, reading their argument # we need to build a Pydantic model for them dynamically, reading their argument
# structure from the signature. In Cython 3, this doesn't work because the # structure from the signature. In Cython 3, this doesn't work because the
@ -178,7 +188,6 @@ def register_factories() -> None:
def make_attribute_ruler( def make_attribute_ruler(
nlp: Language, name: str, validate: bool, scorer: Optional[Callable] nlp: Language, name: str, validate: bool, scorer: Optional[Callable]
): ):
from .pipeline.attributeruler import AttributeRuler
return AttributeRuler(nlp.vocab, name, validate=validate, scorer=scorer) return AttributeRuler(nlp.vocab, name, validate=validate, scorer=scorer)
def make_entity_linker( def make_entity_linker(
@ -202,7 +211,6 @@ def register_factories() -> None:
candidates_batch_size: int, candidates_batch_size: int,
threshold: Optional[float] = None, threshold: Optional[float] = None,
): ):
from .pipeline.entity_linker import EntityLinker, EntityLinker_v1
if not model.attrs.get("include_span_maker", False): if not model.attrs.get("include_span_maker", False):
# The only difference in arguments here is that use_gold_ents and threshold aren't available. # The only difference in arguments here is that use_gold_ents and threshold aren't available.
@ -246,7 +254,6 @@ def register_factories() -> None:
overwrite: bool, overwrite: bool,
scorer: Optional[Callable], scorer: Optional[Callable],
): ):
from .pipeline.lemmatizer import Lemmatizer
return Lemmatizer( return Lemmatizer(
nlp.vocab, model, name, mode=mode, overwrite=overwrite, scorer=scorer nlp.vocab, model, name, mode=mode, overwrite=overwrite, scorer=scorer
) )
@ -263,11 +270,9 @@ def register_factories() -> None:
def make_token_splitter( def make_token_splitter(
nlp: Language, name: str, *, min_length: int = 0, split_length: int = 0 nlp: Language, name: str, *, min_length: int = 0, split_length: int = 0
): ):
from .pipeline.functions import TokenSplitter
return TokenSplitter(min_length=min_length, split_length=split_length) return TokenSplitter(min_length=min_length, split_length=split_length)
def make_doc_cleaner(nlp: Language, name: str, *, attrs: Dict[str, Any], silent: bool): def make_doc_cleaner(nlp: Language, name: str, *, attrs: Dict[str, Any], silent: bool):
from .pipeline.functions import DocCleaner
return DocCleaner(attrs, silent=silent) return DocCleaner(attrs, silent=silent)
def make_tok2vec(nlp: Language, name: str, model: Model) -> Tok2Vec: def make_tok2vec(nlp: Language, name: str, model: Model) -> Tok2Vec:
@ -306,8 +311,7 @@ def register_factories() -> None:
negative_weight: float, negative_weight: float,
allow_overlap: bool, allow_overlap: bool,
scorer: Optional[Callable], scorer: Optional[Callable],
) -> "SpanCategorizer": ) -> SpanCategorizer:
from .pipeline.spancat import SpanCategorizer
return SpanCategorizer( return SpanCategorizer(
nlp.vocab, nlp.vocab,
model=model, model=model,
@ -332,7 +336,6 @@ def register_factories() -> None:
scorer: Optional[Callable], scorer: Optional[Callable],
ent_id_sep: str, ent_id_sep: str,
): ):
from .pipeline.span_ruler import SpanRuler, prioritize_new_ents_filter, prioritize_existing_ents_filter
if overwrite_ents: if overwrite_ents:
ents_filter = prioritize_new_ents_filter ents_filter = prioritize_new_ents_filter
else: else:
@ -385,7 +388,6 @@ def register_factories() -> None:
overwrite: bool, overwrite: bool,
scorer: Optional[Callable], scorer: Optional[Callable],
): ):
from .pipeline.span_ruler import SpanRuler
return SpanRuler( return SpanRuler(
nlp, nlp,
name, name,
@ -410,7 +412,6 @@ def register_factories() -> None:
top_k: int, top_k: int,
scorer: Optional[Callable], scorer: Optional[Callable],
): ):
from .pipeline.edit_tree_lemmatizer import EditTreeLemmatizer
return EditTreeLemmatizer( return EditTreeLemmatizer(
nlp.vocab, nlp.vocab,
model, model,
@ -583,7 +584,6 @@ def register_factories() -> None:
label_smoothing: float, label_smoothing: float,
scorer: Optional[Callable], scorer: Optional[Callable],
): ):
from .pipeline.morphologizer import Morphologizer
return Morphologizer( return Morphologizer(
nlp.vocab, model, name, nlp.vocab, model, name,
overwrite=overwrite, overwrite=overwrite,