Fix shape inference in begin_training

2025-07-11 08:42:28 +03:00 · 2020-05-21 19:26:29 +02:00 · 2020-05-21 19:26:29 +02:00 · f075655deb
commit f075655deb
parent 1729165e90
5 changed files with 14 additions and 7 deletions
--- a/spacy/ml/models/parser.py
+++ b/spacy/ml/models/parser.py
@ -15,10 +15,9 @@ def build_tb_parser_model(
    use_upper=True,
    nO=None,
 ):
    token_vector_width = tok2vec.get_dim("nO")
    tok2vec = chain(
        tok2vec,
-        with_array(Linear(hidden_width, token_vector_width)),
+        with_array(Linear(hidden_width)),
        list2array(),
    )
    tok2vec.set_dim("nO", hidden_width)
--- a/spacy/ml/models/tagger.py
+++ b/spacy/ml/models/tagger.py
@ -6,9 +6,8 @@ from ...util import registry
@registry.architectures.register("spacy.Tagger.v1")
 def build_tagger_model(tok2vec, nO=None) -> Model:
    token_vector_width = tok2vec.get_dim("nO")
    # TODO: glorot_uniform_init seems to work a bit better than zero_init here?!
-    output_layer = Softmax(nO, nI=token_vector_width, init_W=zero_init)
+    output_layer = Softmax(nO, init_W=zero_init)
    softmax = with_array(output_layer)
    model = chain(tok2vec, softmax)
    model.set_ref("tok2vec", tok2vec)
--- a/spacy/ml/tb_framework.py
+++ b/spacy/ml/tb_framework.py
@ -38,8 +38,8 @@ def forward(model, X, is_train):
 def init(model, X=None, Y=None):
-    tok2vec = model.get_ref("tok2vec").initialize()
+    tok2vec = model.get_ref("tok2vec").initialize(X=X)
-    lower = model.get_ref("lower").initialize(X=X)
+    lower = model.get_ref("lower").initialize()
    if model.attrs["has_upper"]:
        statevecs = model.ops.alloc2f(2, lower.get_dim("nO"))
        model.get_ref("upper").initialize(X=statevecs)
--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@ -531,7 +531,12 @@ class Tagger(Pipe):
                                          vocab.morphology.lemmatizer,
                                          exc=vocab.morphology.exc)
        self.set_output(len(self.labels))
-        self.model.initialize()
+        doc_sample = [Doc(self.vocab, words=["hello", "world"])]
        for name, component in pipeline:
            if component is self:
                break
            doc_sample = list(component.pipe(doc_sample))
        self.model.initialize(X=doc_sample)
        # Get batch of example docs, example outputs to call begin_training().
        # This lets the model infer shapes.
        link_vectors_to_models(self.vocab)
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -629,6 +629,10 @@ cdef class Parser:
            for doc, gold in parses:
                doc_sample.append(doc)
                gold_sample.append(gold)
        for name, component in pipeline:
            if component is self:
                break
            doc_sample = list(component.pipe(doc_sample))
        self.model.initialize(doc_sample, gold_sample)
        if pipeline is not None:
            self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)