This commit is contained in:
Matthew Honnibal 2025-05-21 20:48:05 +02:00
parent 0630d62264
commit b8db76ccbe
26 changed files with 104 additions and 104 deletions

View File

@ -17,9 +17,9 @@ from .cli.info import info # noqa: F401
from .errors import Errors
from .glossary import explain # noqa: F401
from .language import Language
from .registrations import REGISTRY_POPULATED, populate_registry
from .util import logger, registry # noqa: F401
from .vocab import Vocab
from .registrations import populate_registry, REGISTRY_POPULATED
if sys.maxunicode == 65535:
raise SystemError(Errors.E130)

View File

@ -1,7 +1,7 @@
import importlib
import sys
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
import sys
import importlib
import srsly

View File

@ -1,8 +1,8 @@
# cython: infer_types=True, binding=True
from collections import defaultdict
from typing import Callable, Optional
import importlib
import sys
from collections import defaultdict
from typing import Callable, Optional
from thinc.api import Config, Model

View File

@ -1,7 +1,7 @@
from collections import Counter
from itertools import islice
import importlib
import sys
from collections import Counter
from itertools import islice
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, cast
import numpy as np

View File

@ -1,9 +1,9 @@
import importlib
import random
import sys
from itertools import islice
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Union
import sys
import importlib
import srsly
from thinc.api import Config, CosineDistance, Model, Optimizer, set_dropout_rate

View File

@ -1,8 +1,8 @@
import importlib
import sys
import warnings
from collections import defaultdict
from pathlib import Path
import importlib
import sys
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
import srsly

View File

@ -1,58 +1,52 @@
from typing import Dict, Any, Callable, Iterable, List, Optional, Union, Tuple
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
from thinc.api import Model
from thinc.types import Floats2d, Ragged
from ..tokens.doc import Doc
from ..tokens.span import Span
from ..kb import KnowledgeBase, Candidate
from ..vocab import Vocab
from ..pipeline.textcat import TextCategorizer
from ..pipeline.tok2vec import Tok2Vec
from ..pipeline.spancat import SpanCategorizer, Suggester
from ..pipeline.textcat_multilabel import MultiLabel_TextCategorizer
from ..pipeline.entityruler import EntityRuler
from ..pipeline.span_finder import SpanFinder
from ..pipeline.ner import EntityRecognizer
from ..pipeline._parser_internals.transition_system import TransitionSystem
from ..pipeline.dep_parser import DependencyParser
from ..pipeline.tagger import Tagger
from ..pipeline.multitask import MultitaskObjective
from ..pipeline.senter import SentenceRecognizer
from ..kb import Candidate, KnowledgeBase
from ..language import Language
from ..pipeline.sentencizer import Sentencizer
from ..pipeline._parser_internals.transition_system import TransitionSystem
from ..pipeline.attributeruler import AttributeRuler
from ..pipeline.dep_parser import DEFAULT_PARSER_MODEL, DependencyParser
from ..pipeline.edit_tree_lemmatizer import (
DEFAULT_EDIT_TREE_LEMMATIZER_MODEL,
EditTreeLemmatizer,
)
# Import factory default configurations
from ..pipeline.entity_linker import DEFAULT_NEL_MODEL
from ..pipeline.entityruler import DEFAULT_ENT_ID_SEP
from ..pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
from ..pipeline.senter import DEFAULT_SENTER_MODEL
from ..pipeline.morphologizer import DEFAULT_MORPH_MODEL
from ..pipeline.entity_linker import DEFAULT_NEL_MODEL, EntityLinker, EntityLinker_v1
from ..pipeline.entityruler import DEFAULT_ENT_ID_SEP, EntityRuler
from ..pipeline.functions import DocCleaner, TokenSplitter
from ..pipeline.lemmatizer import Lemmatizer
from ..pipeline.morphologizer import DEFAULT_MORPH_MODEL, Morphologizer
from ..pipeline.multitask import DEFAULT_MT_MODEL, MultitaskObjective
from ..pipeline.ner import DEFAULT_NER_MODEL, EntityRecognizer
from ..pipeline.sentencizer import Sentencizer
from ..pipeline.senter import DEFAULT_SENTER_MODEL, SentenceRecognizer
from ..pipeline.span_finder import DEFAULT_SPAN_FINDER_MODEL, SpanFinder
from ..pipeline.span_ruler import DEFAULT_SPANS_KEY as SPAN_RULER_DEFAULT_SPANS_KEY
from ..pipeline.span_ruler import (
SpanRuler,
prioritize_existing_ents_filter,
prioritize_new_ents_filter,
)
from ..pipeline.spancat import (
DEFAULT_SPANCAT_MODEL,
DEFAULT_SPANCAT_SINGLELABEL_MODEL,
DEFAULT_SPANS_KEY,
SpanCategorizer,
Suggester,
)
from ..pipeline.span_ruler import DEFAULT_SPANS_KEY as SPAN_RULER_DEFAULT_SPANS_KEY
from ..pipeline.edit_tree_lemmatizer import DEFAULT_EDIT_TREE_LEMMATIZER_MODEL
from ..pipeline.textcat_multilabel import DEFAULT_MULTI_TEXTCAT_MODEL
from ..pipeline.span_finder import DEFAULT_SPAN_FINDER_MODEL
from ..pipeline.ner import DEFAULT_NER_MODEL
from ..pipeline.dep_parser import DEFAULT_PARSER_MODEL
from ..pipeline.tagger import DEFAULT_TAGGER_MODEL
from ..pipeline.multitask import DEFAULT_MT_MODEL
from ..pipeline.textcat import DEFAULT_SINGLE_TEXTCAT_MODEL
from ..pipeline.entity_linker import EntityLinker, EntityLinker_v1
from ..pipeline.attributeruler import AttributeRuler
from ..pipeline.lemmatizer import Lemmatizer
from ..pipeline.functions import TokenSplitter
from ..pipeline.functions import DocCleaner
from ..pipeline.span_ruler import (
SpanRuler,
prioritize_new_ents_filter,
prioritize_existing_ents_filter,
from ..pipeline.tagger import DEFAULT_TAGGER_MODEL, Tagger
from ..pipeline.textcat import DEFAULT_SINGLE_TEXTCAT_MODEL, TextCategorizer
from ..pipeline.textcat_multilabel import (
DEFAULT_MULTI_TEXTCAT_MODEL,
MultiLabel_TextCategorizer,
)
from ..pipeline.edit_tree_lemmatizer import EditTreeLemmatizer
from ..pipeline.morphologizer import Morphologizer
from ..pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL, Tok2Vec
from ..tokens.doc import Doc
from ..tokens.span import Span
from ..vocab import Vocab
# Global flag to track if factories have been registered
FACTORIES_REGISTERED = False

View File

@ -1,7 +1,7 @@
import importlib
import sys
import warnings
from typing import Any, Dict
import sys
import importlib
import srsly

View File

@ -1,7 +1,7 @@
import warnings
from pathlib import Path
import importlib
import sys
import warnings
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
from thinc.api import Model

View File

@ -1,8 +1,8 @@
# cython: infer_types=True, binding=True
import importlib
import sys
from itertools import islice
from typing import Callable, Dict, Optional, Union
import sys
import importlib
from thinc.api import Config, Model, SequenceCategoricalCrossentropy

View File

@ -1,7 +1,7 @@
# cython: infer_types=True, binding=True
from typing import Optional
import sys
import importlib
import sys
from typing import Optional
import numpy
from thinc.api import Config, CosineDistance, Model, set_dropout_rate, to_categorical

View File

@ -1,8 +1,8 @@
# cython: infer_types=True, binding=True
from collections import defaultdict
from typing import Callable, Optional
import importlib
import sys
from collections import defaultdict
from typing import Callable, Optional
from thinc.api import Config, Model

View File

@ -1,7 +1,7 @@
# cython: infer_types=True, binding=True
from typing import Callable, List, Optional
import importlib
import sys
from typing import Callable, List, Optional
import srsly

View File

@ -1,8 +1,8 @@
# cython: infer_types=True, binding=True
import importlib
import sys
from itertools import islice
from typing import Callable, Optional
import sys
import importlib
from thinc.api import Config, Model, SequenceCategoricalCrossentropy

View File

@ -1,6 +1,6 @@
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
import sys
import importlib
import sys
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
from thinc.api import Config, Model, Optimizer, set_dropout_rate
from thinc.types import Floats2d

View File

@ -1,8 +1,8 @@
import importlib
import sys
import warnings
from functools import partial
from pathlib import Path
import importlib
import sys
from typing import (
Any,
Callable,

View File

@ -1,7 +1,7 @@
from dataclasses import dataclass
from functools import partial
import importlib
import sys
from dataclasses import dataclass
from functools import partial
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast
import numpy

View File

@ -1,8 +1,8 @@
# cython: infer_types=True, binding=True
from itertools import islice
from typing import Callable, Optional
import importlib
import sys
from itertools import islice
from typing import Callable, Optional
import numpy
from thinc.api import Config, Model, SequenceCategoricalCrossentropy, set_dropout_rate

View File

@ -1,11 +1,11 @@
import importlib
import sys
from itertools import islice
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
import numpy
from thinc.api import Config, Model, Optimizer, get_array_module, set_dropout_rate
from thinc.types import Floats2d
import sys
import importlib
from ..errors import Errors
from ..language import Language

View File

@ -1,7 +1,7 @@
import importlib
import sys
from itertools import islice
from typing import Any, Callable, Dict, Iterable, List, Optional
import sys
import importlib
from thinc.api import Config, Model
from thinc.types import Floats2d

View File

@ -1,6 +1,6 @@
from itertools import islice
import importlib
import sys
from itertools import islice
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence
from thinc.api import Config, Model, Optimizer, set_dropout_rate

View File

@ -21,26 +21,26 @@ def populate_registry() -> None:
return
# Import all necessary modules
from .util import registry, make_first_longest_spans_filter
# Import all pipeline components that were using registry decorators
from .pipeline.tagger import make_tagger_scorer
from .pipeline.ner import make_ner_scorer
from .pipeline.lemmatizer import make_lemmatizer_scorer
from .pipeline.span_finder import make_span_finder_scorer
from .pipeline.spancat import (
make_spancat_scorer,
build_ngram_suggester,
build_ngram_range_suggester,
build_preset_spans_suggester,
)
from .pipeline.entityruler import (
make_entity_ruler_scorer as make_entityruler_scorer,
)
from .pipeline.lemmatizer import make_lemmatizer_scorer
from .pipeline.ner import make_ner_scorer
from .pipeline.sentencizer import senter_score as make_sentencizer_scorer
from .pipeline.senter import make_senter_scorer
from .pipeline.span_finder import make_span_finder_scorer
from .pipeline.spancat import (
build_ngram_range_suggester,
build_ngram_suggester,
build_preset_spans_suggester,
make_spancat_scorer,
)
# Import all pipeline components that were using registry decorators
from .pipeline.tagger import make_tagger_scorer
from .pipeline.textcat import make_textcat_scorer
from .pipeline.textcat_multilabel import make_textcat_multilabel_scorer
from .util import make_first_longest_spans_filter, registry
# Register miscellaneous components
registry.misc("spacy.first_longest_spans_filter.v1")(
@ -55,14 +55,14 @@ def populate_registry() -> None:
# Import ML components that use registry
from .ml.models.tok2vec import (
tok2vec_listener_v1,
build_hash_embed_cnn_tok2vec,
build_Tok2Vec_model,
MultiHashEmbed,
BiLSTMEncoder,
CharacterEmbed,
MaxoutWindowEncoder,
MishWindowEncoder,
BiLSTMEncoder,
MultiHashEmbed,
build_hash_embed_cnn_tok2vec,
build_Tok2Vec_model,
tok2vec_listener_v1,
)
# Register scorers

View File

@ -1,9 +1,10 @@
# coding: utf-8
"""Test factory import compatibility from original and new locations."""
import pytest
import importlib
import pytest
@pytest.mark.parametrize(
"factory_name,original_module,compat_module",

View File

@ -1,7 +1,9 @@
import json
import inspect
import pytest
import json
from pathlib import Path
import pytest
from spacy.language import Language
from spacy.util import registry
@ -10,6 +12,7 @@ REFERENCE_FILE = Path(__file__).parent / "factory_registrations.json"
# Monkey patch the util.is_same_func to handle Cython functions
import inspect
from spacy import util
original_is_same_func = util.is_same_func

View File

@ -1,7 +1,9 @@
import json
import os
import pytest
from pathlib import Path
import pytest
from spacy.util import registry
# Path to the reference registry contents, relative to this file

View File

@ -135,7 +135,7 @@ class registry(thinc.registry):
@classmethod
def ensure_populated(cls) -> None:
"""Ensure the registry is populated with all necessary components."""
from .registrations import populate_registry, REGISTRY_POPULATED
from .registrations import REGISTRY_POPULATED, populate_registry
if not REGISTRY_POPULATED:
populate_registry()