Remove 'sgd' arg from component initialize

2025-07-25 15:39:46 +03:00 · 2020-09-29 11:42:35 +02:00 · 2020-09-29 11:42:35 +02:00 · b3b6868639
commit b3b6868639
parent 5276db6f3f
7 changed files with 11 additions and 29 deletions
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@ -129,7 +129,7 @@ class Morphologizer(Tagger):
            self.cfg["labels_pos"][norm_label] = POS_IDS[pos]
        return 1

-    def initialize(self, get_examples, *, pipeline=None, sgd=None):
+    def initialize(self, get_examples, *, pipeline=None):
        """Initialize the pipe for training, using a representative set
        of data examples.

@ -138,8 +138,6 @@ class Morphologizer(Tagger):
        pipeline (List[Tuple[str, Callable]]): Optional list of pipeline
            components that this component is part of. Corresponds to
            nlp.pipeline.
-        sgd (thinc.api.Optimizer): Optional optimizer. Will be created with
-            create_optimizer if it doesn't exist.
        RETURNS (thinc.api.Optimizer): The optimizer.

        DOCS: https://nightly.spacy.io/api/morphologizer#initialize
@ -178,9 +176,6 @@ class Morphologizer(Tagger):
        assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
        assert len(label_sample) > 0, Errors.E923.format(name=self.name)
        self.model.initialize(X=doc_sample, Y=label_sample)
-        if sgd is None:
-            sgd = self.create_optimizer()
-        return sgd

    def set_annotations(self, docs, batch_tag_ids):
        """Modify a batch of documents, using pre-computed scores.
--- a/spacy/pipeline/multitask.pyx
+++ b/spacy/pipeline/multitask.pyx
@ -81,7 +81,7 @@ class MultitaskObjective(Tagger):
    def set_annotations(self, docs, dep_ids):
        pass

-    def initialize(self, get_examples, pipeline=None, sgd=None):
+    def initialize(self, get_examples, pipeline=None):
        if not hasattr(get_examples, "__call__"):
            err = Errors.E930.format(name="MultitaskObjective", obj=type(get_examples))
            raise ValueError(err)
@ -177,7 +177,7 @@ class ClozeMultitask(Pipe):
    def set_annotations(self, docs, dep_ids):
        pass

-    def initialize(self, get_examples, pipeline=None, sgd=None):
+    def initialize(self, get_examples, pipeline=None):
        self.model.initialize()  # TODO: fix initialization by defining X and Y
        X = self.model.ops.alloc((5, self.model.get_ref("tok2vec").get_dim("nO")))
        self.model.output_layer.initialize(X)
--- a/spacy/pipeline/pipe.pyx
+++ b/spacy/pipeline/pipe.pyx
@ -183,7 +183,7 @@ cdef class Pipe:
        """
        return util.create_default_optimizer()

-    def initialize(self, get_examples, *, pipeline=None, sgd=None):
+    def initialize(self, get_examples, *, pipeline=None):
        """Initialize the pipe for training, using data examples if available.
        This method needs to be implemented by each Pipe component,
        ensuring the internal model (if available) is initialized properly
@ -194,8 +194,6 @@ cdef class Pipe:
        pipeline (List[Tuple[str, Callable]]): Optional list of pipeline
            components that this component is part of. Corresponds to
            nlp.pipeline.
-        sgd (thinc.api.Optimizer): Optional optimizer. Will be created with
-            create_optimizer if it doesn't exist.
        RETURNS (thinc.api.Optimizer): The optimizer.

        DOCS: https://nightly.spacy.io/api/pipe#initialize
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@ -124,7 +124,7 @@ class SentenceRecognizer(Tagger):
            raise ValueError("nan value when computing loss")
        return float(loss), d_scores

-    def initialize(self, get_examples, *, pipeline=None, sgd=None):
+    def initialize(self, get_examples, *, pipeline=None):
        """Initialize the pipe for training, using a representative set
        of data examples.

@ -133,9 +133,7 @@ class SentenceRecognizer(Tagger):
        pipeline (List[Tuple[str, Callable]]): Optional list of pipeline
            components that this component is part of. Corresponds to
            nlp.pipeline.
-        sgd (thinc.api.Optimizer): Optional optimizer. Will be created with
-            create_optimizer if it doesn't exist.
-        RETURNS (thinc.api.Optimizer): The optimizer.
+        RETURNS: None

        DOCS: https://nightly.spacy.io/api/sentencerecognizer#initialize
        """
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@ -256,7 +256,7 @@ class Tagger(Pipe):
            raise ValueError("nan value when computing loss")
        return float(loss), d_scores

-    def initialize(self, get_examples, *, pipeline=None, sgd=None):
+    def initialize(self, get_examples, *, pipeline=None):
        """Initialize the pipe for training, using a representative set
        of data examples.

@ -265,8 +265,6 @@ class Tagger(Pipe):
        pipeline (List[Tuple[str, Callable]]): Optional list of pipeline
            components that this component is part of. Corresponds to
            nlp.pipeline.
-        sgd (thinc.api.Optimizer): Optional optimizer. Will be created with
-            create_optimizer if it doesn't exist.
        RETURNS (thinc.api.Optimizer): The optimizer.

        DOCS: https://nightly.spacy.io/api/tagger#initialize
@ -289,9 +287,6 @@ class Tagger(Pipe):
        assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
        assert len(label_sample) > 0, Errors.E923.format(name=self.name)
        self.model.initialize(X=doc_sample, Y=label_sample)
-        if sgd is None:
-            sgd = self.create_optimizer()
-        return sgd

    def add_label(self, label):
        """Add a new label to the pipe.
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@ -338,8 +338,7 @@ class TextCategorizer(Pipe):
        self,
        get_examples: Callable[[], Iterable[Example]],
        *,
-        pipeline: Optional[List[Tuple[str, Callable[[Doc], Doc]]]] = None,
-        sgd: Optional[Optimizer] = None,
+        pipeline: Optional[List[Tuple[str, Callable[[Doc], Doc]]]] = None
    ) -> Optimizer:
        """Initialize the pipe for training, using a representative set
        of data examples.
--- a/spacy/pipeline/transition_parser.pyx
+++ b/spacy/pipeline/transition_parser.pyx
@ -354,7 +354,7 @@ cdef class Parser(Pipe):
            # If all weights for an output are 0 in the original model, don't
            # supervise that output. This allows us to add classes.
            loss += (d_scores**2).sum()
-            backprop(d_scores, sgd=sgd)
+            backprop(d_scores)
            # Follow the predicted action
            self.transition_states(states, guesses)
            states = [state for state in states if not state.is_final()]
@ -405,9 +405,8 @@ cdef class Parser(Pipe):
    def set_output(self, nO):
        self.model.attrs["resize_output"](self.model, nO)

-    def initialize(self, get_examples, pipeline=None, sgd=None, **kwargs):
+    def initialize(self, get_examples, pipeline=None, settings=None):
        self._ensure_examples(get_examples)
-        self.cfg.update(kwargs)
        lexeme_norms = self.vocab.lookups.get_table("lexeme_norm", {})
        if len(lexeme_norms) == 0 and self.vocab.lang in util.LEXEME_NORM_LANGS:
            langs = ", ".join(util.LEXEME_NORM_LANGS)
@ -425,8 +424,6 @@ cdef class Parser(Pipe):
        self.moves.initialize_actions(actions)
        # make sure we resize so we have an appropriate upper layer
        self._resize()
-        if sgd is None:
-            sgd = self.create_optimizer()
        doc_sample = []
        if pipeline is not None:
            for name, component in pipeline:
@ -442,7 +439,7 @@ cdef class Parser(Pipe):
        assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
        self.model.initialize(doc_sample)
        if pipeline is not None:
-            self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
+            self.init_multitask_objectives(get_examples, pipeline)
        return sgd

    def to_disk(self, path, exclude=tuple()):