mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-04 12:20:20 +03:00
Remove unused _forward_reference function
This commit is contained in:
parent
9fccaab333
commit
0fc07343e4
|
@ -317,106 +317,6 @@ def _forward_fallback(model: Model, moves: TransitionSystem, states: List[StateC
|
|||
return (list(batch), all_scores), backprop_parser
|
||||
|
||||
|
||||
def _forward_reference(
|
||||
model, docs_moves: Tuple[List[Doc], TransitionSystem], is_train: bool
|
||||
):
|
||||
"""Slow reference implementation, without the precomputation"""
|
||||
nF = model.get_dim("nF")
|
||||
tok2vec = model.get_ref("tok2vec")
|
||||
output = model.get_ref("output")
|
||||
hidden_pad = model.get_param("hidden_pad")
|
||||
hidden_W = model.get_param("hidden_W")
|
||||
hidden_b = model.get_param("hidden_b")
|
||||
nH = model.get_dim("nH")
|
||||
nP = model.get_dim("nP")
|
||||
nO = model.get_dim("nO")
|
||||
nI = model.get_dim("nI")
|
||||
|
||||
ops = model.ops
|
||||
docs, moves = docs_moves
|
||||
states = moves.init_batch(docs)
|
||||
tokvecs, backprop_tok2vec = tok2vec(docs, is_train)
|
||||
tokvecs = model.ops.xp.vstack((tokvecs, hidden_pad))
|
||||
all_ids = []
|
||||
all_which = []
|
||||
all_statevecs = []
|
||||
all_scores = []
|
||||
all_tokfeats = []
|
||||
next_states = [s for s in states if not s.is_final()]
|
||||
seen_mask = _get_seen_mask(model)
|
||||
ids = numpy.zeros((len(states), nF), dtype="i")
|
||||
while next_states:
|
||||
ids = ids[: len(next_states)]
|
||||
for i, state in enumerate(next_states):
|
||||
state.set_context_tokens(ids, i, nF)
|
||||
# Sum the state features, add the bias and apply the activation (maxout)
|
||||
# to create the state vectors.
|
||||
tokfeats3f = tokvecs[ids]
|
||||
tokfeats = model.ops.reshape2f(tokfeats3f, tokfeats3f.shape[0], -1)
|
||||
preacts2f = model.ops.gemm(tokfeats, hidden_W, trans2=True)
|
||||
preacts2f += hidden_b
|
||||
preacts = model.ops.reshape3f(preacts2f, preacts2f.shape[0], nH, nP)
|
||||
statevecs, which = ops.maxout(preacts)
|
||||
# We don't use output's backprop, since we want to backprop for
|
||||
# all states at once, rather than a single state.
|
||||
scores = output.predict(statevecs)
|
||||
scores[:, seen_mask] = model.ops.xp.nanmin(scores)
|
||||
# Transition the states, filtering out any that are finished.
|
||||
next_states = moves.transition_states(next_states, scores)
|
||||
all_scores.append(scores)
|
||||
if is_train:
|
||||
# Remember intermediate results for the backprop.
|
||||
all_tokfeats.append(tokfeats)
|
||||
all_ids.append(ids.copy())
|
||||
all_statevecs.append(statevecs)
|
||||
all_which.append(which)
|
||||
|
||||
nS = sum(len(s.history) for s in states)
|
||||
|
||||
def backprop_parser(d_states_d_scores):
|
||||
d_tokvecs = model.ops.alloc2f(tokvecs.shape[0], tokvecs.shape[1])
|
||||
ids = model.ops.xp.vstack(all_ids)
|
||||
which = ops.xp.vstack(all_which)
|
||||
statevecs = model.ops.xp.vstack(all_statevecs)
|
||||
tokfeats = model.ops.xp.vstack(all_tokfeats)
|
||||
_, d_scores = d_states_d_scores
|
||||
if model.attrs.get("unseen_classes"):
|
||||
# If we have a negative gradient (i.e. the probability should
|
||||
# increase) on any classes we filtered out as unseen, mark
|
||||
# them as seen.
|
||||
for clas in set(model.attrs["unseen_classes"]):
|
||||
if (d_scores[:, clas] < 0).any():
|
||||
model.attrs["unseen_classes"].remove(clas)
|
||||
d_scores *= seen_mask == False
|
||||
assert statevecs.shape == (nS, nH), statevecs.shape
|
||||
assert d_scores.shape == (nS, nO), d_scores.shape
|
||||
# Calculate the gradients for the parameters of the output layer.
|
||||
# The weight gemm is (nS, nO) @ (nS, nH).T
|
||||
output.inc_grad("b", d_scores.sum(axis=0))
|
||||
output.inc_grad("W", model.ops.gemm(d_scores, statevecs, trans1=True))
|
||||
# Now calculate d_statevecs, by backproping through the output linear layer.
|
||||
# This gemm is (nS, nO) @ (nO, nH)
|
||||
output_W = output.get_param("W")
|
||||
d_statevecs = model.ops.gemm(d_scores, output_W)
|
||||
# Backprop through the maxout activation
|
||||
d_preacts = model.ops.backprop_maxout(d_statevecs, which, nP)
|
||||
d_preacts2f = model.ops.reshape2f(d_preacts, d_preacts.shape[0], nH * nP)
|
||||
# Now increment the gradients for the hidden layer.
|
||||
# The gemm here is (nS, nH*nP) @ (nS, nF*nI)
|
||||
model.inc_grad("hidden_b", d_preacts2f.sum(axis=0))
|
||||
model.inc_grad("hidden_W", model.ops.gemm(d_preacts2f, tokfeats, trans1=True))
|
||||
# Caclulate d_tokfeats
|
||||
# The gemm here is (nS, nH*nP) @ (nH*nP, nF*nI)
|
||||
d_tokfeats = model.ops.gemm(d_preacts2f, hidden_W)
|
||||
# Get the gradients of the tokvecs and the padding
|
||||
d_tokfeats3f = model.ops.reshape3f(d_tokfeats, nS, nF, nI)
|
||||
model.ops.scatter_add(d_tokvecs, ids, d_tokfeats3f)
|
||||
model.inc_grad("hidden_pad", d_tokvecs[-1])
|
||||
return (backprop_tok2vec(d_tokvecs[:-1]), None)
|
||||
|
||||
return (states, all_scores), backprop_parser
|
||||
|
||||
|
||||
def _get_seen_mask(model: Model) -> numpy.array[bool, 1]:
|
||||
mask = model.ops.xp.zeros(model.get_dim("nO"), dtype="bool")
|
||||
for class_ in model.attrs.get("unseen_classes", set()):
|
||||
|
|
Loading…
Reference in New Issue
Block a user