mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
Return empty batch from tok2vec listener if no doc.tensor
This commit is contained in:
parent
89f692bc8a
commit
58f19421b1
|
@ -295,4 +295,19 @@ def forward(model: Tok2VecListener, inputs, is_train: bool):
|
||||||
model.verify_inputs(inputs)
|
model.verify_inputs(inputs)
|
||||||
return model._outputs, model._backprop
|
return model._outputs, model._backprop
|
||||||
else:
|
else:
|
||||||
return [doc.tensor for doc in inputs], lambda dX: []
|
# This is pretty grim, but it's hard to do better :(.
|
||||||
|
# It's hard to avoid relying on the doc.tensor attribute, because the
|
||||||
|
# pipeline components can batch the data differently during prediction.
|
||||||
|
# That doesn't happen in update, where the nlp object works on batches
|
||||||
|
# of data.
|
||||||
|
# When the components batch differently, we don't receive a matching
|
||||||
|
# prediction from the upstream, so we can't predict.
|
||||||
|
if not all(doc.tensor.size for doc in inputs):
|
||||||
|
# But we do need to do *something* if the tensor hasn't been set.
|
||||||
|
# The compromise is to at least return data of the right shape,
|
||||||
|
# so the output is valid.
|
||||||
|
width = model.get_dim("nO")
|
||||||
|
outputs = [model.ops.alloc2f(len(doc), width) for doc in inputs]
|
||||||
|
else:
|
||||||
|
outputs = [doc.tensor for doc in inputs]
|
||||||
|
return outputs, lambda dX: []
|
||||||
|
|
Loading…
Reference in New Issue
Block a user