From 0630d62264525b29648f2d4e155ee090f6789a8e Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 21 May 2025 20:46:41 +0200 Subject: [PATCH] Format --- spacy/pipeline/attributeruler.py | 2 - spacy/pipeline/edit_tree_lemmatizer.py | 2 - spacy/pipeline/entity_linker.py | 2 - spacy/pipeline/entityruler.py | 4 +- spacy/pipeline/lemmatizer.py | 2 - spacy/pipeline/span_ruler.py | 4 -- spacy/pipeline/spancat.py | 6 +-- spacy/pipeline/tok2vec.py | 2 - spacy/tests/test_factory_registrations.py | 48 ++++++++++++++++------- spacy/tests/test_registry_population.py | 17 +++++--- spacy/util.py | 1 + 11 files changed, 47 insertions(+), 43 deletions(-) diff --git a/spacy/pipeline/attributeruler.py b/spacy/pipeline/attributeruler.py index eb9c61d20..cd84738fa 100644 --- a/spacy/pipeline/attributeruler.py +++ b/spacy/pipeline/attributeruler.py @@ -24,8 +24,6 @@ TagMapType = Dict[str, Dict[Union[int, str], Union[int, str]]] MorphRulesType = Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]] - - def attribute_ruler_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]: def morph_key_getter(token, attr): return getattr(token, attr).key diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py index b4be480ba..2ce5ad359 100644 --- a/spacy/pipeline/edit_tree_lemmatizer.py +++ b/spacy/pipeline/edit_tree_lemmatizer.py @@ -41,8 +41,6 @@ subword_features = true DEFAULT_EDIT_TREE_LEMMATIZER_MODEL = Config().from_str(default_model_config)["model"] - - class EditTreeLemmatizer(TrainablePipe): """ Lemmatizer that lemmatizes each word using a predicted edit tree. diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py index 260b63925..d78aa104a 100644 --- a/spacy/pipeline/entity_linker.py +++ b/spacy/pipeline/entity_linker.py @@ -42,8 +42,6 @@ subword_features = true DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"] - - def entity_linker_score(examples, **kwargs): return Scorer.score_links(examples, negative_labels=[EntityLinker.NIL], **kwargs) diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py index 5c2d9d2cb..b904b166c 100644 --- a/spacy/pipeline/entityruler.py +++ b/spacy/pipeline/entityruler.py @@ -21,8 +21,6 @@ DEFAULT_ENT_ID_SEP = "||" PatternType = Dict[str, Union[str, List[Dict[str, Any]]]] - - def entity_ruler_score(examples, **kwargs): return get_ner_prf(examples) @@ -509,4 +507,4 @@ def __getattr__(name): if name == "make_entity_ruler": module = importlib.import_module("spacy.pipeline.factories") return module.make_entity_ruler - raise AttributeError(f"module {__name__} has no attribute {name}") \ No newline at end of file + raise AttributeError(f"module {__name__} has no attribute {name}") diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py index 3c054314c..abb7e49e8 100644 --- a/spacy/pipeline/lemmatizer.py +++ b/spacy/pipeline/lemmatizer.py @@ -18,8 +18,6 @@ from ..vocab import Vocab from .pipe import Pipe - - def lemmatizer_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]: return Scorer.score_token_attr(examples, "lemma", **kwargs) diff --git a/spacy/pipeline/span_ruler.py b/spacy/pipeline/span_ruler.py index 43dc95509..efebdc248 100644 --- a/spacy/pipeline/span_ruler.py +++ b/spacy/pipeline/span_ruler.py @@ -34,10 +34,6 @@ PatternType = Dict[str, Union[str, List[Dict[str, Any]]]] DEFAULT_SPANS_KEY = "ruler" - - - - def prioritize_new_ents_filter( entities: Iterable[Span], spans: Iterable[Span] ) -> List[Span]: diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py index 32c286bc5..143f97667 100644 --- a/spacy/pipeline/spancat.py +++ b/spacy/pipeline/spancat.py @@ -159,10 +159,6 @@ def build_preset_spans_suggester(spans_key: str) -> Suggester: return partial(preset_spans_suggester, spans_key=spans_key) - - - - def spancat_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]: kwargs = dict(kwargs) attr_prefix = "spans_" @@ -667,4 +663,4 @@ def __getattr__(name): elif name == "make_spancat_singlelabel": module = importlib.import_module("spacy.pipeline.factories") return module.make_spancat_singlelabel - raise AttributeError(f"module {__name__} has no attribute {name}") \ No newline at end of file + raise AttributeError(f"module {__name__} has no attribute {name}") diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py index 4aab5f934..e9d959869 100644 --- a/spacy/pipeline/tok2vec.py +++ b/spacy/pipeline/tok2vec.py @@ -26,8 +26,6 @@ subword_features = true DEFAULT_TOK2VEC_MODEL = Config().from_str(default_model_config)["model"] - - class Tok2Vec(TrainablePipe): """Apply a "token-to-vector" model and set its outputs in the doc.tensor attribute. This is mostly useful to share a single subnetwork between multiple diff --git a/spacy/tests/test_factory_registrations.py b/spacy/tests/test_factory_registrations.py index 7dbcc81a5..cee75360a 100644 --- a/spacy/tests/test_factory_registrations.py +++ b/spacy/tests/test_factory_registrations.py @@ -14,6 +14,7 @@ from spacy import util original_is_same_func = util.is_same_func + def patched_is_same_func(func1, func2): # Handle Cython functions try: @@ -22,28 +23,33 @@ def patched_is_same_func(func1, func2): # For Cython functions, just compare the string representation return str(func1) == str(func2) + util.is_same_func = patched_is_same_func + @pytest.fixture def reference_factory_registrations(): """Load reference factory registrations from JSON file""" if not REFERENCE_FILE.exists(): - pytest.fail(f"Reference file {REFERENCE_FILE} not found. Run export_factory_registrations.py first.") - + pytest.fail( + f"Reference file {REFERENCE_FILE} not found. Run export_factory_registrations.py first." + ) + with REFERENCE_FILE.open("r") as f: return json.load(f) + def test_factory_registrations_preserved(reference_factory_registrations): """Test that all factory registrations from the reference file are still present.""" # Ensure the registry is populated registry.ensure_populated() - + # Get all factory registrations all_factories = registry.factories.get_all() - + # Initialize our data structure to store current factory registrations current_registrations = {} - + # Process factory registrations for name, func in all_factories.items(): # Store information about each factory @@ -51,26 +57,38 @@ def test_factory_registrations_preserved(reference_factory_registrations): module_name = func.__module__ except (AttributeError, TypeError): # For Cython functions, just use a placeholder - module_name = str(func).split()[1].split('.')[0] - + module_name = str(func).split()[1].split(".")[0] + try: func_name = func.__qualname__ except (AttributeError, TypeError): # For Cython functions, use the function's name - func_name = func.__name__ if hasattr(func, "__name__") else str(func).split()[1].split('.')[-1] - + func_name = ( + func.__name__ + if hasattr(func, "__name__") + else str(func).split()[1].split(".")[-1] + ) + current_registrations[name] = { "name": name, "module": module_name, "function": func_name, } - + # Check for missing registrations - missing_registrations = set(reference_factory_registrations.keys()) - set(current_registrations.keys()) - assert not missing_registrations, f"Missing factory registrations: {', '.join(sorted(missing_registrations))}" - + missing_registrations = set(reference_factory_registrations.keys()) - set( + current_registrations.keys() + ) + assert ( + not missing_registrations + ), f"Missing factory registrations: {', '.join(sorted(missing_registrations))}" + # Check for new registrations (not an error, but informative) - new_registrations = set(current_registrations.keys()) - set(reference_factory_registrations.keys()) + new_registrations = set(current_registrations.keys()) - set( + reference_factory_registrations.keys() + ) if new_registrations: # This is not an error, just informative - print(f"New factory registrations found: {', '.join(sorted(new_registrations))}") \ No newline at end of file + print( + f"New factory registrations found: {', '.join(sorted(new_registrations))}" + ) diff --git a/spacy/tests/test_registry_population.py b/spacy/tests/test_registry_population.py index 732e57a0d..fb5ce6850 100644 --- a/spacy/tests/test_registry_population.py +++ b/spacy/tests/test_registry_population.py @@ -7,25 +7,28 @@ from spacy.util import registry # Path to the reference registry contents, relative to this file REFERENCE_FILE = Path(__file__).parent / "registry_contents.json" + @pytest.fixture def reference_registry(): """Load reference registry contents from JSON file""" if not REFERENCE_FILE.exists(): pytest.fail(f"Reference file {REFERENCE_FILE} not found.") - + with REFERENCE_FILE.open("r") as f: return json.load(f) + def test_registry_types(reference_registry): """Test that all registry types match the reference""" # Get current registry types current_registry_types = set(registry.get_registry_names()) expected_registry_types = set(reference_registry.keys()) - + # Check for missing registry types missing_types = expected_registry_types - current_registry_types assert not missing_types, f"Missing registry types: {', '.join(missing_types)}" + def test_registry_entries(reference_registry): """Test that all registry entries are present""" # Check each registry's entries @@ -33,16 +36,18 @@ def test_registry_entries(reference_registry): # Skip if this registry type doesn't exist if not hasattr(registry, registry_name): pytest.fail(f"Registry '{registry_name}' does not exist.") - + # Get current entries reg = getattr(registry, registry_name) current_entries = sorted(list(reg.get_all().keys())) - + # Compare entries expected_set = set(expected_entries) current_set = set(current_entries) - + # Check for missing entries - these would indicate our new registry population # mechanism is missing something missing_entries = expected_set - current_set - assert not missing_entries, f"Registry '{registry_name}' missing entries: {', '.join(missing_entries)}" \ No newline at end of file + assert ( + not missing_entries + ), f"Registry '{registry_name}' missing entries: {', '.join(missing_entries)}" diff --git a/spacy/util.py b/spacy/util.py index 96b52e21d..6ff65eb8e 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -136,6 +136,7 @@ class registry(thinc.registry): def ensure_populated(cls) -> None: """Ensure the registry is populated with all necessary components.""" from .registrations import populate_registry, REGISTRY_POPULATED + if not REGISTRY_POPULATED: populate_registry()