* Add non-monotonic parsing with cost-sensitive update. 92.26 on Y&M set

This commit is contained in:
Matthew Honnibal 2014-12-18 11:33:25 +11:00
parent 7e0c692daf
commit 6ab7e40590
6 changed files with 118 additions and 87 deletions

View File

@ -45,7 +45,7 @@ cdef inline void fill_token(atom_t* context, const TokenC* token) nogil:
# the source that are set to 1. # the source that are set to 1.
context[4] = token.lex.cluster & 63 context[4] = token.lex.cluster & 63
context[5] = token.lex.cluster & 15 context[5] = token.lex.cluster & 15
context[6] = token.dep_tag context[6] = token.dep_tag if has_head(token) else 0
cdef int fill_context(atom_t* context, State* state) except -1: cdef int fill_context(atom_t* context, State* state) except -1:

View File

@ -70,10 +70,10 @@ cdef inline bint is_final(const State *s) nogil:
return at_eol(s) # The stack will be attached to root anyway return at_eol(s) # The stack will be attached to root anyway
cdef int children_in_buffer(const State *s, const int head, int* gold) except -1 cdef int children_in_buffer(const State *s, const int head, const int* gold) except -1
cdef int head_in_buffer(const State *s, const int child, int* gold) except -1 cdef int head_in_buffer(const State *s, const int child, const int* gold) except -1
cdef int children_in_stack(const State *s, const int head, int* gold) except -1 cdef int children_in_stack(const State *s, const int head, const int* gold) except -1
cdef int head_in_stack(const State *s, const int child, int* gold) except -1 cdef int head_in_stack(const State *s, const int child, const int* gold) except -1
cdef State* init_state(Pool mem, TokenC* sent, const int sent_length) except NULL cdef State* init_state(Pool mem, TokenC* sent, const int sent_length) except NULL

View File

@ -6,6 +6,10 @@ from ..lexeme cimport EMPTY_LEXEME
from ..tokens cimport TokenC from ..tokens cimport TokenC
DEF PADDING = 5
DEF NON_MONOTONIC = True
cdef int add_dep(State *s, int head, int child, int label) except -1: cdef int add_dep(State *s, int head, int child, int label) except -1:
cdef int dist = head - child cdef int dist = head - child
s.sent[child].head = dist s.sent[child].head = dist
@ -32,9 +36,14 @@ cdef int push_stack(State *s) except -1:
s.stack[0] = s.i s.stack[0] = s.i
s.stack_len += 1 s.stack_len += 1
s.i += 1 s.i += 1
if at_eol(s):
while s.stack_len != 0:
if not has_head(get_s0(s)):
get_s0(s).dep_tag = 0
pop_stack(s)
cdef int children_in_buffer(const State *s, int head, int* gold) except -1: cdef int children_in_buffer(const State *s, int head, const int* gold) except -1:
# Golds holds an array of head offsets --- the head of word i is i - golds[i] # Golds holds an array of head offsets --- the head of word i is i - golds[i]
# Iterate over the tokens of the queue, and check whether their gold head is # Iterate over the tokens of the queue, and check whether their gold head is
# our target # our target
@ -46,20 +55,21 @@ cdef int children_in_buffer(const State *s, int head, int* gold) except -1:
return n return n
cdef int head_in_buffer(const State *s, const int child, int* gold) except -1: cdef int head_in_buffer(const State *s, const int child, const int* gold) except -1:
return gold[child] >= s.i return gold[child] >= s.i
cdef int children_in_stack(const State *s, const int head, int* gold) except -1: cdef int children_in_stack(const State *s, const int head, const int* gold) except -1:
cdef int i cdef int i
cdef int n = 0 cdef int n = 0
for i in range(s.stack_len): for i in range(s.stack_len):
if gold[s.stack[-i]] == head: if gold[s.stack[-i]] == head:
n += 1 if NON_MONOTONIC or not has_head(get_s0(s)):
n += 1
return n return n
cdef int head_in_stack(const State *s, const int child, int* gold) except -1: cdef int head_in_stack(const State *s, const int child, const int* gold) except -1:
cdef int i cdef int i
for i in range(s.stack_len): for i in range(s.stack_len):
if gold[child] == s.stack[-i]: if gold[child] == s.stack[-i]:
@ -104,9 +114,6 @@ cdef int count_right_kids(const TokenC* head) nogil:
DEF PADDING = 5
cdef State* init_state(Pool mem, TokenC* sent, const int sent_length) except NULL: cdef State* init_state(Pool mem, TokenC* sent, const int sent_length) except NULL:
cdef int padded_len = sent_length + PADDING + PADDING cdef int padded_len = sent_length + PADDING + PADDING
cdef State* s = <State*>mem.alloc(1, sizeof(State)) cdef State* s = <State*>mem.alloc(1, sizeof(State))

View File

@ -7,8 +7,11 @@ from ._state cimport State
cdef struct Transition: cdef struct Transition:
int clas
int move int move
int label int label
int cost
weight_t score
cdef class TransitionSystem: cdef class TransitionSystem:
@ -18,7 +21,8 @@ cdef class TransitionSystem:
cdef const Transition* _moves cdef const Transition* _moves
cdef int best_valid(self, const weight_t* scores, const State* s) except -1 cdef Transition best_valid(self, const weight_t* scores, const State* s) except *
cdef int best_gold(self, const weight_t* scores, const State* s, cdef Transition best_gold(self, Transition* guess, const weight_t* scores,
int* gold_heads, int* gold_labels) except -1 const State* s,
cdef int transition(self, State *s, const int clas) except -1 const int* gold_heads, const int* gold_labels) except *
cdef int transition(self, State *s, const Transition* t) except -1

View File

@ -7,6 +7,8 @@ from ._state cimport head_in_stack, children_in_stack
from ..tokens cimport TokenC from ..tokens cimport TokenC
DEF NON_MONOTONIC = True
cdef enum: cdef enum:
SHIFT SHIFT
@ -25,22 +27,30 @@ cdef inline bint _can_right(const State* s) nogil:
cdef inline bint _can_left(const State* s) nogil: cdef inline bint _can_left(const State* s) nogil:
return s.stack_len >= 1 and not has_head(get_s0(s)) if NON_MONOTONIC:
return s.stack_len >= 1
else:
return s.stack_len >= 1 and not has_head(get_s0(s))
cdef inline bint _can_reduce(const State* s) nogil: cdef inline bint _can_reduce(const State* s) nogil:
return s.stack_len >= 2 and has_head(get_s0(s)) if NON_MONOTONIC:
return s.stack_len >= 2
else:
return s.stack_len >= 2 and has_head(get_s0(s))
cdef int _shift_cost(const State* s, int* gold) except -1: cdef int _shift_cost(const State* s, const int* gold) except -1:
assert not at_eol(s) assert not at_eol(s)
cost = 0 cost = 0
cost += head_in_stack(s, s.i, gold) cost += head_in_stack(s, s.i, gold)
cost += children_in_stack(s, s.i, gold) cost += children_in_stack(s, s.i, gold)
if NON_MONOTONIC:
cost += gold[s.stack[0]] == s.i
return cost return cost
cdef int _right_cost(const State* s, int* gold) except -1: cdef int _right_cost(const State* s, const int* gold) except -1:
assert s.stack_len >= 1 assert s.stack_len >= 1
cost = 0 cost = 0
if gold[s.i] == s.stack[0]: if gold[s.i] == s.stack[0]:
@ -48,10 +58,12 @@ cdef int _right_cost(const State* s, int* gold) except -1:
cost += head_in_buffer(s, s.i, gold) cost += head_in_buffer(s, s.i, gold)
cost += children_in_stack(s, s.i, gold) cost += children_in_stack(s, s.i, gold)
cost += head_in_stack(s, s.i, gold) cost += head_in_stack(s, s.i, gold)
if NON_MONOTONIC:
cost += gold[s.stack[0]] == s.i
return cost return cost
cdef int _left_cost(const State* s, int* gold) except -1: cdef int _left_cost(const State* s, const int* gold) except -1:
assert s.stack_len >= 1 assert s.stack_len >= 1
cost = 0 cost = 0
if gold[s.stack[0]] == s.i: if gold[s.stack[0]] == s.i:
@ -59,11 +71,17 @@ cdef int _left_cost(const State* s, int* gold) except -1:
cost += head_in_buffer(s, s.stack[0], gold) cost += head_in_buffer(s, s.stack[0], gold)
cost += children_in_buffer(s, s.stack[0], gold) cost += children_in_buffer(s, s.stack[0], gold)
if NON_MONOTONIC and s.stack_len >= 2:
cost += gold[s.stack[0]] == s.stack[-1]
return cost return cost
cdef int _reduce_cost(const State* s, int* gold) except -1: cdef int _reduce_cost(const State* s, const int* gold) except -1:
return children_in_buffer(s, s.stack[0], gold) cdef int cost = 0
cost += children_in_buffer(s, s.stack[0], gold)
if NON_MONOTONIC:
cost += head_in_buffer(s, s.stack[0], gold)
return cost
cdef class TransitionSystem: cdef class TransitionSystem:
@ -80,9 +98,11 @@ cdef class TransitionSystem:
cdef int i = 0 cdef int i = 0
moves[i].move = SHIFT moves[i].move = SHIFT
moves[i].label = 0 moves[i].label = 0
moves[i].clas = i
i += 1 i += 1
moves[i].move = REDUCE moves[i].move = REDUCE
moves[i].label = 0 moves[i].label = 0
moves[i].clas = i
i += 1 i += 1
self.label_ids = {'ROOT': 0} self.label_ids = {'ROOT': 0}
cdef int label_id cdef int label_id
@ -90,17 +110,21 @@ cdef class TransitionSystem:
label_id = self.label_ids.setdefault(label_str, len(self.label_ids)) label_id = self.label_ids.setdefault(label_str, len(self.label_ids))
moves[i].move = LEFT moves[i].move = LEFT
moves[i].label = label_id moves[i].label = label_id
moves[i].clas = i
i += 1 i += 1
for label_str in right_labels: for label_str in right_labels:
label_id = self.label_ids.setdefault(label_str, len(self.label_ids)) label_id = self.label_ids.setdefault(label_str, len(self.label_ids))
moves[i].move = RIGHT moves[i].move = RIGHT
moves[i].label = label_id moves[i].label = label_id
moves[i].clas = i
i += 1 i += 1
self._moves = moves self._moves = moves
cdef int transition(self, State *s, const int clas) except -1: cdef int transition(self, State *s, const Transition* t) except -1:
cdef const Transition* t = &self._moves[clas]
if t.move == SHIFT: if t.move == SHIFT:
# Set the dep label, in case we need it after we reduce
if NON_MONOTONIC:
get_s0(s).dep_tag = t.label
push_stack(s) push_stack(s)
elif t.move == LEFT: elif t.move == LEFT:
add_dep(s, s.i, s.stack[0], t.label) add_dep(s, s.i, s.stack[0], t.label)
@ -109,11 +133,12 @@ cdef class TransitionSystem:
add_dep(s, s.stack[0], s.i, t.label) add_dep(s, s.stack[0], s.i, t.label)
push_stack(s) push_stack(s)
elif t.move == REDUCE: elif t.move == REDUCE:
add_dep(s, s.stack[-1], s.stack[0], get_s0(s).dep_tag)
pop_stack(s) pop_stack(s)
else: else:
raise StandardError(t.move) raise StandardError(t.move)
cdef int best_valid(self, const weight_t* scores, const State* s) except -1: cdef Transition best_valid(self, const weight_t* scores, const State* s) except *:
cdef bint[N_MOVES] valid cdef bint[N_MOVES] valid
valid[SHIFT] = _can_shift(s) valid[SHIFT] = _can_shift(s)
valid[LEFT] = _can_left(s) valid[LEFT] = _can_left(s)
@ -122,69 +147,61 @@ cdef class TransitionSystem:
cdef int best = -1 cdef int best = -1
cdef weight_t score = 0 cdef weight_t score = 0
cdef weight_t best_r_score = -9000
cdef int best_r_label = -1
cdef int i cdef int i
for i in range(self.n_moves): for i in range(self.n_moves):
if valid[self._moves[i].move] and (best == -1 or scores[i] > score): if valid[self._moves[i].move] and (best == -1 or scores[i] > score):
best = i best = i
score = scores[i] score = scores[i]
if self._moves[i].move == RIGHT and scores[i] > best_r_score:
best_r_label = self._moves[i].label
assert best >= 0 assert best >= 0
return best cdef Transition t = self._moves[best]
t.score = score
if t.move == SHIFT:
t.label = best_r_label
return t
cdef int best_gold(self, const weight_t* scores, const State* s, cdef Transition best_gold(self, Transition* guess, const weight_t* scores,
int* gold_heads, int* gold_labels) except -1: const State* s,
const int* gold_heads, const int* gold_labels) except *:
# If we can create a gold dependency, only one action can be correct
cdef int[N_MOVES] unl_costs cdef int[N_MOVES] unl_costs
unl_costs[SHIFT] = _shift_cost(s, gold_heads) if _can_shift(s) else -1 unl_costs[SHIFT] = _shift_cost(s, gold_heads) if _can_shift(s) else -1
unl_costs[LEFT] = _left_cost(s, gold_heads) if _can_left(s) else -1 unl_costs[LEFT] = _left_cost(s, gold_heads) if _can_left(s) else -1
unl_costs[RIGHT] = _right_cost(s, gold_heads) if _can_right(s) else -1 unl_costs[RIGHT] = _right_cost(s, gold_heads) if _can_right(s) else -1
unl_costs[REDUCE] = _reduce_cost(s, gold_heads) if _can_reduce(s) else -1 unl_costs[REDUCE] = _reduce_cost(s, gold_heads) if _can_reduce(s) else -1
#s0_buff_head = head_in_buffer(s, get_s0(s), gold_heads) guess.cost = unl_costs[guess.move]
#s0_stack_head = head_in_stack(s, get_s0(s), gold_heads) cdef Transition t
#s0_buff_kids = children_in_buffer(s, get_s0(s), gold_heads) cdef int target_label
#s0_stack_kids = children_in_stack(s, get_s0(s), gold_heads) cdef int i
if gold_heads[s.stack[0]] == s.i:
target_label = gold_labels[s.stack[0]]
if guess.move == LEFT:
guess.cost += guess.label != target_label
for i in range(self.n_moves):
t = self._moves[i]
if t.move == LEFT and t.label == target_label:
return t
elif gold_heads[s.i] == s.stack[0]:
target_label = gold_labels[s.i]
if guess.move == RIGHT:
guess.cost += guess.label != target_label
for i in range(self.n_moves):
t = self._moves[i]
if t.move == RIGHT and t.label == target_label:
return t
#n0_buff_head = head_in_buffer(s, get_n0(s), gold_heads)
#n0_stack_head = head_in_stack(s, get_n0(s), gold_heads)
#n0_buff_kids = children_in_buffer(s, get_n0(s), gold_heads)
#n0_stack_kids = children_in_buffer(s, get_n0(s), gold_heads)
cdef int cost
cdef int move
cdef int label
cdef int best = -1 cdef int best = -1
cdef weight_t score = -9000 cdef weight_t score = -9000
cdef int i
for i in range(self.n_moves): for i in range(self.n_moves):
move = self._moves[i].move t = self._moves[i]
label = self._moves[i].label if unl_costs[t.move] == 0 and (best == -1 or scores[i] > score):
if unl_costs[move] == 0: best = i
if move == SHIFT or move == REDUCE: score = scores[i]
cost = 0 t = self._moves[best]
elif move == LEFT: t.score = score
if gold_heads[s.stack[0]] == s.i and gold_labels[s.stack[0]] != -1: assert best >= 0
cost = label != gold_labels[s.stack[0]] return t
else:
cost = 0
elif move == RIGHT:
if gold_heads[s.i] == s.stack[0] and gold_labels[s.i] != -1:
cost = label != gold_labels[s.i]
else:
cost = 0
else:
raise StandardError("Unknown Move")
if cost == 0 and (best == -1 or scores[i] > score):
best = i
score = scores[i]
if best < 0:
print unl_costs[SHIFT], unl_costs[REDUCE], unl_costs[LEFT], unl_costs[RIGHT]
print s.stack_len
print has_head(get_s0(s))
print s.sent[s.stack[0]].head
print s.stack[0], s.i
print gold_heads[s.stack[0]], gold_heads[s.i]
print gold_labels[s.i]
print children_in_buffer(s, s.stack[0], gold_heads)
print head_in_buffer(s, s.stack[0], gold_heads)
raise StandardError
return best

View File

@ -24,7 +24,7 @@ from thinc.learner cimport LinearModel
from ..tokens cimport Tokens, TokenC from ..tokens cimport Tokens, TokenC
from .arc_eager cimport TransitionSystem from .arc_eager cimport TransitionSystem, Transition
from ._state cimport init_state, State, is_final, get_idx, get_s0, get_s1 from ._state cimport init_state, State, is_final, get_idx, get_s0, get_s1
@ -70,7 +70,7 @@ cdef class GreedyParser:
cdef: cdef:
Feature* feats Feature* feats
const weight_t* scores const weight_t* scores
int guess Transition guess
cdef atom_t[CONTEXT_SIZE] context cdef atom_t[CONTEXT_SIZE] context
cdef int n_feats cdef int n_feats
@ -81,13 +81,15 @@ cdef class GreedyParser:
feats = self.extractor.get_feats(context, &n_feats) feats = self.extractor.get_feats(context, &n_feats)
scores = self.model.get_scores(feats, n_feats) scores = self.model.get_scores(feats, n_feats)
guess = self.moves.best_valid(scores, state) guess = self.moves.best_valid(scores, state)
self.moves.transition(state, guess) self.moves.transition(state, &guess)
return 0 return 0
def train_sent(self, Tokens tokens, list gold_heads, list gold_labels): def train_sent(self, Tokens tokens, list gold_heads, list gold_labels):
cdef: cdef:
Feature* feats Feature* feats
weight_t* scores weight_t* scores
Transition guess
Transition gold
cdef int n_feats cdef int n_feats
cdef atom_t[CONTEXT_SIZE] context cdef atom_t[CONTEXT_SIZE] context
@ -105,17 +107,18 @@ cdef class GreedyParser:
feats = self.extractor.get_feats(context, &n_feats) feats = self.extractor.get_feats(context, &n_feats)
scores = self.model.get_scores(feats, n_feats) scores = self.model.get_scores(feats, n_feats)
guess = self.moves.best_valid(scores, state) guess = self.moves.best_valid(scores, state)
best = self.moves.best_gold(scores, state, heads_array, labels_array) best = self.moves.best_gold(&guess, scores, state, heads_array, labels_array)
counts = _get_counts(guess, best, feats, n_feats) counts = _get_counts(guess.clas, best.clas, feats, n_feats, guess.cost)
self.model.update(counts) self.model.update(counts)
self.moves.transition(state, guess) self.moves.transition(state, &guess)
cdef int n_corr = 0 cdef int n_corr = 0
for i in range(tokens.length): for i in range(tokens.length):
n_corr += (i + state.sent[i].head) == gold_heads[i] n_corr += (i + state.sent[i].head) == gold_heads[i]
return n_corr return n_corr
cdef dict _get_counts(int guess, int best, const Feature* feats, const int n_feats): cdef dict _get_counts(int guess, int best, const Feature* feats, const int n_feats,
int inc):
if guess == best: if guess == best:
return {} return {}
@ -125,10 +128,10 @@ cdef dict _get_counts(int guess, int best, const Feature* feats, const int n_fea
for i in range(n_feats): for i in range(n_feats):
key = (feats[i].i, feats[i].key) key = (feats[i].i, feats[i].key)
if key in gold_counts: if key in gold_counts:
gold_counts[key] += feats[i].value gold_counts[key] += (feats[i].value * inc)
guess_counts[key] -= feats[i].value guess_counts[key] -= (feats[i].value * inc)
else: else:
gold_counts[key] = feats[i].value gold_counts[key] = (feats[i].value * inc)
guess_counts[key] = -feats[i].value guess_counts[key] = -(feats[i].value * inc)
return {guess: guess_counts, best: gold_counts} return {guess: guess_counts, best: gold_counts}