mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-10 16:22:29 +03:00
Small update_with_oracle_cut_size
fixes
Fix an off-by-one in `TransitionModel.forward`, where we always did one move more than the maximum number of moves. This explosed another issue: when creating cut states, we skipped states where the (maximum number of) moves from that state only applied transitions that did not modify the buffer. Replace uses of `random.uniform` by `random.randrange`.
This commit is contained in:
parent
e27c60a702
commit
10f5e9413d
|
@ -338,9 +338,9 @@ def _forward_fallback(
|
||||||
all_ids.append(ids)
|
all_ids.append(ids)
|
||||||
all_statevecs.append(statevecs)
|
all_statevecs.append(statevecs)
|
||||||
all_which.append(which)
|
all_which.append(which)
|
||||||
|
n_moves += 1
|
||||||
if n_moves >= max_moves >= 1:
|
if n_moves >= max_moves >= 1:
|
||||||
break
|
break
|
||||||
n_moves += 1
|
|
||||||
|
|
||||||
def backprop_parser(d_states_d_scores):
|
def backprop_parser(d_states_d_scores):
|
||||||
ids = ops.xp.vstack(all_ids)
|
ids = ops.xp.vstack(all_ids)
|
||||||
|
|
|
@ -258,7 +258,7 @@ class Parser(TrainablePipe):
|
||||||
# batch uniform length. Since we do not have a gold standard
|
# batch uniform length. Since we do not have a gold standard
|
||||||
# sequence, we use the teacher's predictions as the gold
|
# sequence, we use the teacher's predictions as the gold
|
||||||
# standard.
|
# standard.
|
||||||
max_moves = int(random.uniform(max(max_moves // 2, 1), max_moves * 2))
|
max_moves = random.randrange(max(max_moves // 2, 1), max_moves * 2)
|
||||||
states = self._init_batch_from_teacher(teacher_pipe, student_docs, max_moves)
|
states = self._init_batch_from_teacher(teacher_pipe, student_docs, max_moves)
|
||||||
else:
|
else:
|
||||||
states = self.moves.init_batch(student_docs)
|
states = self.moves.init_batch(student_docs)
|
||||||
|
@ -425,7 +425,7 @@ class Parser(TrainablePipe):
|
||||||
if max_moves >= 1:
|
if max_moves >= 1:
|
||||||
# Chop sequences into lengths of this many words, to make the
|
# Chop sequences into lengths of this many words, to make the
|
||||||
# batch uniform length.
|
# batch uniform length.
|
||||||
max_moves = int(random.uniform(max(max_moves // 2, 1), max_moves * 2))
|
max_moves = random.randrange(max(max_moves // 2, 1), max_moves * 2)
|
||||||
init_states, gold_states, _ = self._init_gold_batch(
|
init_states, gold_states, _ = self._init_gold_batch(
|
||||||
examples,
|
examples,
|
||||||
max_length=max_moves
|
max_length=max_moves
|
||||||
|
@ -729,9 +729,8 @@ class Parser(TrainablePipe):
|
||||||
action.do(state.c, action.label)
|
action.do(state.c, action.label)
|
||||||
if state.is_final():
|
if state.is_final():
|
||||||
break
|
break
|
||||||
if moves.has_gold(eg, start_state.B(0), state.B(0)):
|
states.append(start_state)
|
||||||
states.append(start_state)
|
golds.append(gold)
|
||||||
golds.append(gold)
|
|
||||||
if state.is_final():
|
if state.is_final():
|
||||||
break
|
break
|
||||||
return states, golds, max_length
|
return states, golds, max_length
|
||||||
|
|
Loading…
Reference in New Issue
Block a user