mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-12 17:22:25 +03:00
Add test for factory registrations
This commit is contained in:
parent
c62b9dac0b
commit
d20445ef6f
132
spacy/tests/factory_registrations.json
Normal file
132
spacy/tests/factory_registrations.json
Normal file
|
@ -0,0 +1,132 @@
|
|||
{
|
||||
"attribute_ruler": {
|
||||
"name": "attribute_ruler",
|
||||
"module": "spacy.pipeline.attributeruler",
|
||||
"function": "make_attribute_ruler"
|
||||
},
|
||||
"beam_ner": {
|
||||
"name": "beam_ner",
|
||||
"module": "spacy.pipeline.ner",
|
||||
"function": "make_beam_ner"
|
||||
},
|
||||
"beam_parser": {
|
||||
"name": "beam_parser",
|
||||
"module": "spacy.pipeline.dep_parser",
|
||||
"function": "make_beam_parser"
|
||||
},
|
||||
"doc_cleaner": {
|
||||
"name": "doc_cleaner",
|
||||
"module": "spacy.pipeline.functions",
|
||||
"function": "make_doc_cleaner"
|
||||
},
|
||||
"entity_linker": {
|
||||
"name": "entity_linker",
|
||||
"module": "spacy.pipeline.entity_linker",
|
||||
"function": "make_entity_linker"
|
||||
},
|
||||
"entity_ruler": {
|
||||
"name": "entity_ruler",
|
||||
"module": "spacy.pipeline.entityruler",
|
||||
"function": "make_entity_ruler"
|
||||
},
|
||||
"future_entity_ruler": {
|
||||
"name": "future_entity_ruler",
|
||||
"module": "spacy.pipeline.span_ruler",
|
||||
"function": "make_entity_ruler"
|
||||
},
|
||||
"lemmatizer": {
|
||||
"name": "lemmatizer",
|
||||
"module": "spacy.pipeline.lemmatizer",
|
||||
"function": "make_lemmatizer"
|
||||
},
|
||||
"merge_entities": {
|
||||
"name": "merge_entities",
|
||||
"module": "spacy.language",
|
||||
"function": "Language.component.<locals>.add_component.<locals>.factory_func"
|
||||
},
|
||||
"merge_noun_chunks": {
|
||||
"name": "merge_noun_chunks",
|
||||
"module": "spacy.language",
|
||||
"function": "Language.component.<locals>.add_component.<locals>.factory_func"
|
||||
},
|
||||
"merge_subtokens": {
|
||||
"name": "merge_subtokens",
|
||||
"module": "spacy.language",
|
||||
"function": "Language.component.<locals>.add_component.<locals>.factory_func"
|
||||
},
|
||||
"morphologizer": {
|
||||
"name": "morphologizer",
|
||||
"module": "spacy.pipeline.morphologizer",
|
||||
"function": "make_morphologizer"
|
||||
},
|
||||
"ner": {
|
||||
"name": "ner",
|
||||
"module": "spacy.pipeline.ner",
|
||||
"function": "make_ner"
|
||||
},
|
||||
"parser": {
|
||||
"name": "parser",
|
||||
"module": "spacy.pipeline.dep_parser",
|
||||
"function": "make_parser"
|
||||
},
|
||||
"sentencizer": {
|
||||
"name": "sentencizer",
|
||||
"module": "spacy.pipeline.sentencizer",
|
||||
"function": "make_sentencizer"
|
||||
},
|
||||
"senter": {
|
||||
"name": "senter",
|
||||
"module": "spacy.pipeline.senter",
|
||||
"function": "make_senter"
|
||||
},
|
||||
"span_finder": {
|
||||
"name": "span_finder",
|
||||
"module": "spacy.pipeline.span_finder",
|
||||
"function": "make_span_finder"
|
||||
},
|
||||
"span_ruler": {
|
||||
"name": "span_ruler",
|
||||
"module": "spacy.pipeline.span_ruler",
|
||||
"function": "make_span_ruler"
|
||||
},
|
||||
"spancat": {
|
||||
"name": "spancat",
|
||||
"module": "spacy.pipeline.spancat",
|
||||
"function": "make_spancat"
|
||||
},
|
||||
"spancat_singlelabel": {
|
||||
"name": "spancat_singlelabel",
|
||||
"module": "spacy.pipeline.spancat",
|
||||
"function": "make_spancat_singlelabel"
|
||||
},
|
||||
"tagger": {
|
||||
"name": "tagger",
|
||||
"module": "spacy.pipeline.tagger",
|
||||
"function": "make_tagger"
|
||||
},
|
||||
"textcat": {
|
||||
"name": "textcat",
|
||||
"module": "spacy.pipeline.textcat",
|
||||
"function": "make_textcat"
|
||||
},
|
||||
"textcat_multilabel": {
|
||||
"name": "textcat_multilabel",
|
||||
"module": "spacy.pipeline.textcat_multilabel",
|
||||
"function": "make_multilabel_textcat"
|
||||
},
|
||||
"tok2vec": {
|
||||
"name": "tok2vec",
|
||||
"module": "spacy.pipeline.tok2vec",
|
||||
"function": "make_tok2vec"
|
||||
},
|
||||
"token_splitter": {
|
||||
"name": "token_splitter",
|
||||
"module": "spacy.pipeline.functions",
|
||||
"function": "make_token_splitter"
|
||||
},
|
||||
"trainable_lemmatizer": {
|
||||
"name": "trainable_lemmatizer",
|
||||
"module": "spacy.pipeline.edit_tree_lemmatizer",
|
||||
"function": "make_edit_tree_lemmatizer"
|
||||
}
|
||||
}
|
76
spacy/tests/test_factory_registrations.py
Normal file
76
spacy/tests/test_factory_registrations.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
import json
|
||||
import inspect
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from spacy.language import Language
|
||||
from spacy.util import registry
|
||||
|
||||
# Path to the reference factory registrations, relative to this file
|
||||
REFERENCE_FILE = Path(__file__).parent / "factory_registrations.json"
|
||||
|
||||
# Monkey patch the util.is_same_func to handle Cython functions
|
||||
import inspect
|
||||
from spacy import util
|
||||
|
||||
original_is_same_func = util.is_same_func
|
||||
|
||||
def patched_is_same_func(func1, func2):
|
||||
# Handle Cython functions
|
||||
try:
|
||||
return original_is_same_func(func1, func2)
|
||||
except TypeError:
|
||||
# For Cython functions, just compare the string representation
|
||||
return str(func1) == str(func2)
|
||||
|
||||
util.is_same_func = patched_is_same_func
|
||||
|
||||
@pytest.fixture
|
||||
def reference_factory_registrations():
|
||||
"""Load reference factory registrations from JSON file"""
|
||||
if not REFERENCE_FILE.exists():
|
||||
pytest.fail(f"Reference file {REFERENCE_FILE} not found. Run export_factory_registrations.py first.")
|
||||
|
||||
with REFERENCE_FILE.open("r") as f:
|
||||
return json.load(f)
|
||||
|
||||
def test_factory_registrations_preserved(reference_factory_registrations):
|
||||
"""Test that all factory registrations from the reference file are still present."""
|
||||
# Ensure the registry is populated
|
||||
registry.ensure_populated()
|
||||
|
||||
# Get all factory registrations
|
||||
all_factories = registry.factories.get_all()
|
||||
|
||||
# Initialize our data structure to store current factory registrations
|
||||
current_registrations = {}
|
||||
|
||||
# Process factory registrations
|
||||
for name, func in all_factories.items():
|
||||
# Store information about each factory
|
||||
try:
|
||||
module_name = func.__module__
|
||||
except (AttributeError, TypeError):
|
||||
# For Cython functions, just use a placeholder
|
||||
module_name = str(func).split()[1].split('.')[0]
|
||||
|
||||
try:
|
||||
func_name = func.__qualname__
|
||||
except (AttributeError, TypeError):
|
||||
# For Cython functions, use the function's name
|
||||
func_name = func.__name__ if hasattr(func, "__name__") else str(func).split()[1].split('.')[-1]
|
||||
|
||||
current_registrations[name] = {
|
||||
"name": name,
|
||||
"module": module_name,
|
||||
"function": func_name,
|
||||
}
|
||||
|
||||
# Check for missing registrations
|
||||
missing_registrations = set(reference_factory_registrations.keys()) - set(current_registrations.keys())
|
||||
assert not missing_registrations, f"Missing factory registrations: {', '.join(sorted(missing_registrations))}"
|
||||
|
||||
# Check for new registrations (not an error, but informative)
|
||||
new_registrations = set(current_registrations.keys()) - set(reference_factory_registrations.keys())
|
||||
if new_registrations:
|
||||
# This is not an error, just informative
|
||||
print(f"New factory registrations found: {', '.join(sorted(new_registrations))}")
|
Loading…
Reference in New Issue
Block a user