Add __getattr__ import hooks for factories for backwards compatibility

This commit is contained in:
Matthew Honnibal 2025-05-21 15:11:03 +02:00
parent d8388aa591
commit 4318f8fc85
19 changed files with 205 additions and 20 deletions

View File

@ -1,5 +1,7 @@
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
import sys
import importlib
import srsly
@ -344,3 +346,11 @@ def _split_morph_attrs(attrs: dict) -> Tuple[dict, dict]:
else:
morph_attrs[k] = v
return other_attrs, morph_attrs
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_attribute_ruler":
module = importlib.import_module("spacy.registrations")
return module.make_attribute_ruler
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,6 +1,8 @@
# cython: infer_types=True, binding=True
from collections import defaultdict
from typing import Callable, Optional
import importlib
import sys
from thinc.api import Config, Model
@ -164,3 +166,14 @@ cdef class DependencyParser(Parser):
# because we instead have a label frequency cut-off and back off rare
# labels to 'dep'.
pass
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_parser":
module = importlib.import_module("spacy.registrations")
return module.make_parser
elif name == "make_beam_parser":
module = importlib.import_module("spacy.registrations")
return module.make_beam_parser
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,5 +1,7 @@
from collections import Counter
from itertools import islice
import importlib
import sys
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, cast
import numpy as np
@ -386,3 +388,11 @@ class EditTreeLemmatizer(TrainablePipe):
self.tree2label[tree_id] = len(self.cfg["labels"])
self.cfg["labels"].append(tree_id)
return self.tree2label[tree_id]
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_edit_tree_lemmatizer":
module = importlib.import_module("spacy.registrations")
return module.make_edit_tree_lemmatizer
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -2,6 +2,8 @@ import random
from itertools import islice
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Union
import sys
import importlib
import srsly
from thinc.api import Config, CosineDistance, Model, Optimizer, set_dropout_rate
@ -572,3 +574,11 @@ class EntityLinker(TrainablePipe):
def add_label(self, label):
raise NotImplementedError
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_entity_linker":
module = importlib.import_module("spacy.registrations")
return module.make_entity_linker
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,6 +1,8 @@
import warnings
from collections import defaultdict
from pathlib import Path
import importlib
import sys
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
import srsly
@ -500,3 +502,11 @@ class EntityRuler(Pipe):
srsly.write_jsonl(path, self.patterns)
else:
to_disk(path, serializers, {})
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_entity_ruler":
module = importlib.import_module("spacy.registrations")
return module.make_entity_ruler
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,5 +1,7 @@
import warnings
from typing import Any, Dict
import sys
import importlib
import srsly
@ -73,8 +75,6 @@ def merge_subtokens(doc: Doc, label: str = "subtok") -> Doc:
return doc
class TokenSplitter:
def __init__(self, min_length: int = 0, split_length: int = 0):
self.min_length = min_length
@ -132,8 +132,6 @@ class TokenSplitter:
util.from_disk(path, serializers, [])
class DocCleaner:
def __init__(self, attrs: Dict[str, Any], *, silent: bool = True):
self.cfg: Dict[str, Any] = {"attrs": dict(attrs), "silent": silent}
@ -186,3 +184,14 @@ class DocCleaner:
"cfg": lambda p: self.cfg.update(srsly.read_json(p)),
}
util.from_disk(path, serializers, [])
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_doc_cleaner":
module = importlib.import_module("spacy.registrations")
return module.make_doc_cleaner
elif name == "make_token_splitter":
module = importlib.import_module("spacy.registrations")
return module.make_token_splitter
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,5 +1,7 @@
import warnings
from pathlib import Path
import importlib
import sys
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
from thinc.api import Model
@ -311,3 +313,11 @@ class Lemmatizer(Pipe):
util.from_bytes(bytes_data, deserialize, exclude)
self._validate_tables()
return self
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_lemmatizer":
module = importlib.import_module("spacy.registrations")
return module.make_lemmatizer
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,6 +1,8 @@
# cython: infer_types=True, binding=True
from itertools import islice
from typing import Callable, Dict, Optional, Union
import sys
import importlib
from thinc.api import Config, Model, SequenceCategoricalCrossentropy
@ -290,3 +292,11 @@ class Morphologizer(Tagger):
if self.model.ops.xp.isnan(loss):
raise ValueError(Errors.E910.format(name=self.name))
return float(loss), d_scores
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_morphologizer":
module = importlib.import_module("spacy.registrations")
return module.make_morphologizer
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,5 +1,7 @@
# cython: infer_types=True, binding=True
from typing import Optional
import sys
import importlib
import numpy
from thinc.api import Config, CosineDistance, Model, set_dropout_rate, to_categorical
@ -205,3 +207,11 @@ class ClozeMultitask(TrainablePipe):
def add_label(self, label):
raise NotImplementedError
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_nn_labeller":
module = importlib.import_module("spacy.registrations")
return module.make_nn_labeller
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,6 +1,8 @@
# cython: infer_types=True, binding=True
from collections import defaultdict
from typing import Callable, Optional
import importlib
import sys
from thinc.api import Config, Model
@ -117,3 +119,14 @@ cdef class EntityRecognizer(Parser):
score_dict[(start, end, label)] += score
entity_scores.append(score_dict)
return entity_scores
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_ner":
module = importlib.import_module("spacy.registrations")
return module.make_ner
elif name == "make_beam_ner":
module = importlib.import_module("spacy.registrations")
return module.make_beam_ner
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,5 +1,7 @@
# cython: infer_types=True, binding=True
from typing import Callable, List, Optional
import importlib
import sys
import srsly
@ -14,16 +16,6 @@ from .senter import senter_score
BACKWARD_OVERWRITE = False
def make_sentencizer(
nlp: Language,
name: str,
punct_chars: Optional[List[str]],
overwrite: bool,
scorer: Optional[Callable],
):
return Sentencizer(name, punct_chars=punct_chars, overwrite=overwrite, scorer=scorer)
class Sentencizer(Pipe):
"""Segment the Doc into sentences using a rule-based strategy.
@ -175,3 +167,11 @@ class Sentencizer(Pipe):
self.punct_chars = set(cfg.get("punct_chars", self.default_punct_chars))
self.overwrite = cfg.get("overwrite", self.overwrite)
return self
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_sentencizer":
module = importlib.import_module("spacy.registrations")
return module.make_sentencizer
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,6 +1,8 @@
# cython: infer_types=True, binding=True
from itertools import islice
from typing import Callable, Optional
import sys
import importlib
from thinc.api import Config, Model, SequenceCategoricalCrossentropy
@ -174,3 +176,11 @@ class SentenceRecognizer(Tagger):
def add_label(self, label, values=None):
raise NotImplementedError
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_senter":
module = importlib.import_module("spacy.registrations")
return module.make_senter
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,4 +1,6 @@
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
import sys
import importlib
from thinc.api import Config, Model, Optimizer, set_dropout_rate
from thinc.types import Floats2d
@ -41,8 +43,6 @@ depth = 4
DEFAULT_SPAN_FINDER_MODEL = Config().from_str(span_finder_default_config)["model"]
def make_span_finder_scorer():
return span_finder_score
@ -278,3 +278,11 @@ class SpanFinder(TrainablePipe):
self.model.initialize(X=docs, Y=Y)
else:
self.model.initialize()
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_span_finder":
module = importlib.import_module("spacy.registrations")
return module.make_span_finder
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,6 +1,8 @@
import warnings
from functools import partial
from pathlib import Path
import importlib
import sys
from typing import (
Any,
Callable,
@ -500,3 +502,14 @@ class SpanRuler(Pipe):
"patterns": lambda p: srsly.write_jsonl(p, self.patterns),
}
util.to_disk(path, serializers, {})
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_span_ruler":
module = importlib.import_module("spacy.registrations")
return module.make_span_ruler
elif name == "make_future_span_ruler":
module = importlib.import_module("spacy.registrations")
return module.make_future_span_ruler
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,5 +1,7 @@
from dataclasses import dataclass
from functools import partial
import importlib
import sys
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast
import numpy
@ -655,3 +657,14 @@ class SpanCategorizer(TrainablePipe):
spans.attrs["scores"] = numpy.array(attrs_scores)
return spans
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_spancat":
module = importlib.import_module("spacy.registrations")
return module.make_spancat
elif name == "make_spancat_singlelabel":
module = importlib.import_module("spacy.registrations")
return module.make_spancat_singlelabel
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,6 +1,8 @@
# cython: infer_types=True, binding=True
from itertools import islice
from typing import Callable, Optional
import importlib
import sys
import numpy
from thinc.api import Config, Model, SequenceCategoricalCrossentropy, set_dropout_rate
@ -291,3 +293,11 @@ class Tagger(TrainablePipe):
self.cfg["labels"].append(label)
self.vocab.strings.add(label)
return 1
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_tagger":
module = importlib.import_module("spacy.registrations")
return module.make_tagger
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -4,6 +4,8 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
import numpy
from thinc.api import Config, Model, Optimizer, get_array_module, set_dropout_rate
from thinc.types import Floats2d
import sys
import importlib
from ..errors import Errors
from ..language import Language
@ -74,8 +76,6 @@ subword_features = true
"""
def textcat_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
return Scorer.score_cats(
examples,
@ -373,3 +373,11 @@ class TextCategorizer(TrainablePipe):
for val in vals:
if not (val == 1.0 or val == 0.0):
raise ValueError(Errors.E851.format(val=val))
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_textcat":
module = importlib.import_module("spacy.registrations")
return module.make_textcat
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,5 +1,7 @@
from itertools import islice
from typing import Any, Callable, Dict, Iterable, List, Optional
import sys
import importlib
from thinc.api import Config, Model
from thinc.types import Floats2d
@ -72,8 +74,6 @@ subword_features = true
"""
def textcat_multilabel_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
return Scorer.score_cats(
examples,
@ -170,3 +170,11 @@ class MultiLabel_TextCategorizer(TextCategorizer):
for val in ex.reference.cats.values():
if not (val == 1.0 or val == 0.0):
raise ValueError(Errors.E851.format(val=val))
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_multilabel_textcat":
module = importlib.import_module("spacy.registrations")
return module.make_multilabel_textcat
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -1,4 +1,6 @@
from itertools import islice
import importlib
import sys
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence
from thinc.api import Config, Model, Optimizer, set_dropout_rate
@ -315,3 +317,11 @@ def forward(model: Tok2VecListener, inputs, is_train: bool):
def _empty_backprop(dX): # for pickling
return []
# Setup backwards compatibility hook for factories
def __getattr__(name):
if name == "make_tok2vec":
module = importlib.import_module("spacy.registrations")
return module.make_tok2vec
raise AttributeError(f"module {__name__} has no attribute {name}")