Return empty batch from tok2vec listener if no doc.tensor

2025-09-18 18:12:45 +03:00 · 2020-08-29 03:46:50 +02:00 · 2020-08-29 03:46:50 +02:00 · 58f19421b1
commit 58f19421b1
parent 89f692bc8a
1 changed files with 16 additions and 1 deletions
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@ -295,4 +295,19 @@ def forward(model: Tok2VecListener, inputs, is_train: bool):
        model.verify_inputs(inputs)
        return model._outputs, model._backprop
    else:
-        return [doc.tensor for doc in inputs], lambda dX: []
+        # This is pretty grim, but it's hard to do better :(.
        # It's hard to avoid relying on the doc.tensor attribute, because the
        # pipeline components can batch the data differently during prediction.
        # That doesn't happen in update, where the nlp object works on batches
        # of data.
        # When the components batch differently, we don't receive a matching
        # prediction from the upstream, so we can't predict.
        if not all(doc.tensor.size for doc in inputs):
            # But we do need to do *something* if the tensor hasn't been set.
            # The compromise is to at least return data of the right shape,
            # so the output is valid.
            width = model.get_dim("nO")
            outputs = [model.ops.alloc2f(len(doc), width) for doc in inputs]
        else:
            outputs = [doc.tensor for doc in inputs]
        return outputs, lambda dX: []