Start work on max violation update. About to clean up commented out code.

This commit is contained in:
Matthew Honnibal 2016-07-24 11:01:54 +02:00
parent 8b4abc24e3
commit 476977ef62

View File

@ -105,28 +105,36 @@ cdef class BeamParser(Parser):
# parse. # parse.
self._advance_beam(pred, gold_parse, False) self._advance_beam(pred, gold_parse, False)
self._advance_beam(gold, gold_parse, True) self._advance_beam(gold, gold_parse, True)
# Early update if MAX_VIOLN_UPDATE:
if pred.min_score > gold.score: violn.check_crf(pred, gold)
elif pred.min_score > gold.score: # Early update
break break
# Gather the partition function --- Z --- by which we can normalize the
# scores into a probability distribution. The simple idea here is that
# we clip the probability of all parses outside the beam to 0.
cdef long double Z = 0.0 cdef long double Z = 0.0
for i in range(pred.size): if MAX_VIOLN_UPDATE:
# Make sure we've only got negative examples here. if violn.delta != -1:
# Otherwise, we might double-count the gold. for prob, hist in zip(violn.p_scores, violn.p_hist):
if pred._states[i].loss > 0: self._update_dense(tokens, hist, prob / violn.Z)
Z += exp(pred._states[i].score) for prob, hist in zip(violn.g_scores, violn.g_hist):
if Z > 0: # If no negative examples, don't update. self._update_dense(tokens, hist, -prob / violn.gZ)
Z += exp(gold.score) else:
for i, hist in enumerate(pred.histories): # Gather the partition function --- Z --- by which we can normalize the
if pred._states[i].loss > 0: # scores into a probability distribution. The simple idea here is that
# Update with the negative example. # we clip the probability of all parses outside the beam to 0.
# Gradient of loss is P(parse) - 0 for i in range(pred.size):
self._update_dense(tokens, hist, exp(pred._states[i].score) / Z) # Make sure we've only got negative examples here.
# Update with the positive example. # Otherwise, we might double-count the gold.
# Gradient of loss is P(parse) - 1 if pred._states[i].loss > 0:
self._update_dense(tokens, gold.histories[0], (exp(gold.score) / Z) - 1) Z += exp(pred._states[i].score)
if Z > 0: # If no negative examples, don't update.
Z += exp(gold.score)
for i, hist in enumerate(pred.histories):
if pred._states[i].loss > 0:
# Update with the negative example.
# Gradient of loss is P(parse) - 0
self._update_dense(tokens, hist, exp(pred._states[i].score) / Z)
# Update with the positive example.
# Gradient of loss is P(parse) - 1
self._update_dense(tokens, gold.histories[0], (exp(gold.score) / Z) - 1)
_cleanup(pred) _cleanup(pred)
_cleanup(gold) _cleanup(gold)
return pred.loss return pred.loss