Remove 'sgd' arg from component initialize

This commit is contained in:
Matthew Honnibal 2020-09-29 11:42:35 +02:00
parent 5276db6f3f
commit b3b6868639
7 changed files with 11 additions and 29 deletions

View File

@ -129,7 +129,7 @@ class Morphologizer(Tagger):
self.cfg["labels_pos"][norm_label] = POS_IDS[pos]
return 1
def initialize(self, get_examples, *, pipeline=None, sgd=None):
def initialize(self, get_examples, *, pipeline=None):
"""Initialize the pipe for training, using a representative set
of data examples.
@ -138,8 +138,6 @@ class Morphologizer(Tagger):
pipeline (List[Tuple[str, Callable]]): Optional list of pipeline
components that this component is part of. Corresponds to
nlp.pipeline.
sgd (thinc.api.Optimizer): Optional optimizer. Will be created with
create_optimizer if it doesn't exist.
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://nightly.spacy.io/api/morphologizer#initialize
@ -178,9 +176,6 @@ class Morphologizer(Tagger):
assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
assert len(label_sample) > 0, Errors.E923.format(name=self.name)
self.model.initialize(X=doc_sample, Y=label_sample)
if sgd is None:
sgd = self.create_optimizer()
return sgd
def set_annotations(self, docs, batch_tag_ids):
"""Modify a batch of documents, using pre-computed scores.

View File

@ -81,7 +81,7 @@ class MultitaskObjective(Tagger):
def set_annotations(self, docs, dep_ids):
pass
def initialize(self, get_examples, pipeline=None, sgd=None):
def initialize(self, get_examples, pipeline=None):
if not hasattr(get_examples, "__call__"):
err = Errors.E930.format(name="MultitaskObjective", obj=type(get_examples))
raise ValueError(err)
@ -177,7 +177,7 @@ class ClozeMultitask(Pipe):
def set_annotations(self, docs, dep_ids):
pass
def initialize(self, get_examples, pipeline=None, sgd=None):
def initialize(self, get_examples, pipeline=None):
self.model.initialize() # TODO: fix initialization by defining X and Y
X = self.model.ops.alloc((5, self.model.get_ref("tok2vec").get_dim("nO")))
self.model.output_layer.initialize(X)

View File

@ -183,7 +183,7 @@ cdef class Pipe:
"""
return util.create_default_optimizer()
def initialize(self, get_examples, *, pipeline=None, sgd=None):
def initialize(self, get_examples, *, pipeline=None):
"""Initialize the pipe for training, using data examples if available.
This method needs to be implemented by each Pipe component,
ensuring the internal model (if available) is initialized properly
@ -194,8 +194,6 @@ cdef class Pipe:
pipeline (List[Tuple[str, Callable]]): Optional list of pipeline
components that this component is part of. Corresponds to
nlp.pipeline.
sgd (thinc.api.Optimizer): Optional optimizer. Will be created with
create_optimizer if it doesn't exist.
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://nightly.spacy.io/api/pipe#initialize

View File

@ -124,7 +124,7 @@ class SentenceRecognizer(Tagger):
raise ValueError("nan value when computing loss")
return float(loss), d_scores
def initialize(self, get_examples, *, pipeline=None, sgd=None):
def initialize(self, get_examples, *, pipeline=None):
"""Initialize the pipe for training, using a representative set
of data examples.
@ -133,9 +133,7 @@ class SentenceRecognizer(Tagger):
pipeline (List[Tuple[str, Callable]]): Optional list of pipeline
components that this component is part of. Corresponds to
nlp.pipeline.
sgd (thinc.api.Optimizer): Optional optimizer. Will be created with
create_optimizer if it doesn't exist.
RETURNS (thinc.api.Optimizer): The optimizer.
RETURNS: None
DOCS: https://nightly.spacy.io/api/sentencerecognizer#initialize
"""

View File

@ -256,7 +256,7 @@ class Tagger(Pipe):
raise ValueError("nan value when computing loss")
return float(loss), d_scores
def initialize(self, get_examples, *, pipeline=None, sgd=None):
def initialize(self, get_examples, *, pipeline=None):
"""Initialize the pipe for training, using a representative set
of data examples.
@ -265,8 +265,6 @@ class Tagger(Pipe):
pipeline (List[Tuple[str, Callable]]): Optional list of pipeline
components that this component is part of. Corresponds to
nlp.pipeline.
sgd (thinc.api.Optimizer): Optional optimizer. Will be created with
create_optimizer if it doesn't exist.
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://nightly.spacy.io/api/tagger#initialize
@ -289,9 +287,6 @@ class Tagger(Pipe):
assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
assert len(label_sample) > 0, Errors.E923.format(name=self.name)
self.model.initialize(X=doc_sample, Y=label_sample)
if sgd is None:
sgd = self.create_optimizer()
return sgd
def add_label(self, label):
"""Add a new label to the pipe.

View File

@ -338,8 +338,7 @@ class TextCategorizer(Pipe):
self,
get_examples: Callable[[], Iterable[Example]],
*,
pipeline: Optional[List[Tuple[str, Callable[[Doc], Doc]]]] = None,
sgd: Optional[Optimizer] = None,
pipeline: Optional[List[Tuple[str, Callable[[Doc], Doc]]]] = None
) -> Optimizer:
"""Initialize the pipe for training, using a representative set
of data examples.

View File

@ -354,7 +354,7 @@ cdef class Parser(Pipe):
# If all weights for an output are 0 in the original model, don't
# supervise that output. This allows us to add classes.
loss += (d_scores**2).sum()
backprop(d_scores, sgd=sgd)
backprop(d_scores)
# Follow the predicted action
self.transition_states(states, guesses)
states = [state for state in states if not state.is_final()]
@ -405,9 +405,8 @@ cdef class Parser(Pipe):
def set_output(self, nO):
self.model.attrs["resize_output"](self.model, nO)
def initialize(self, get_examples, pipeline=None, sgd=None, **kwargs):
def initialize(self, get_examples, pipeline=None, settings=None):
self._ensure_examples(get_examples)
self.cfg.update(kwargs)
lexeme_norms = self.vocab.lookups.get_table("lexeme_norm", {})
if len(lexeme_norms) == 0 and self.vocab.lang in util.LEXEME_NORM_LANGS:
langs = ", ".join(util.LEXEME_NORM_LANGS)
@ -425,8 +424,6 @@ cdef class Parser(Pipe):
self.moves.initialize_actions(actions)
# make sure we resize so we have an appropriate upper layer
self._resize()
if sgd is None:
sgd = self.create_optimizer()
doc_sample = []
if pipeline is not None:
for name, component in pipeline:
@ -442,7 +439,7 @@ cdef class Parser(Pipe):
assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
self.model.initialize(doc_sample)
if pipeline is not None:
self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
self.init_multitask_objectives(get_examples, pipeline)
return sgd
def to_disk(self, path, exclude=tuple()):