mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-14 19:46:26 +03:00
* Work on greedy parser
This commit is contained in:
parent
95ccea03b2
commit
d524dd306a
File diff suppressed because it is too large
Load Diff
|
@ -17,6 +17,14 @@ from ._state cimport get_left, get_right
|
||||||
|
|
||||||
|
|
||||||
cdef inline void fill_token(atom_t* context, const TokenC* token) nogil:
|
cdef inline void fill_token(atom_t* context, const TokenC* token) nogil:
|
||||||
|
if token is NULL:
|
||||||
|
context[0] = 0
|
||||||
|
context[1] = 0
|
||||||
|
context[2] = 0
|
||||||
|
context[3] = 0
|
||||||
|
context[4] = 0
|
||||||
|
context[5] = 0
|
||||||
|
else:
|
||||||
context[0] = token.lex.sic
|
context[0] = token.lex.sic
|
||||||
context[1] = token.pos
|
context[1] = token.pos
|
||||||
context[2] = token.lex.cluster
|
context[2] = token.lex.cluster
|
||||||
|
@ -40,30 +48,29 @@ cdef inline void fill_token(atom_t* context, const TokenC* token) nogil:
|
||||||
cdef int fill_context(atom_t* context, State* state) except -1:
|
cdef int fill_context(atom_t* context, State* state) except -1:
|
||||||
# This fills in the basic properties of each of our "slot" tokens, e.g.
|
# This fills in the basic properties of each of our "slot" tokens, e.g.
|
||||||
# word on top of the stack, word at the front of the buffer, etc.
|
# word on top of the stack, word at the front of the buffer, etc.
|
||||||
cdef TokenC* n1 = get_n1(state)
|
|
||||||
fill_token(&context[S2w], get_s2(state))
|
fill_token(&context[S2w], get_s2(state))
|
||||||
fill_token(&context[S1w], get_s1(state))
|
fill_token(&context[S1w], get_s1(state))
|
||||||
#fill_token(&context[S1rw], get_right(state, get_s1(state), 0))
|
fill_token(&context[S1rw], get_right(state, get_s1(state), 1))
|
||||||
fill_token(&context[S0lw], get_left(state, get_s0(state), 0))
|
fill_token(&context[S0lw], get_left(state, get_s0(state), 1))
|
||||||
fill_token(&context[S0l2w], get_left(state, get_s0(state), 1))
|
fill_token(&context[S0l2w], get_left(state, get_s0(state), 2))
|
||||||
fill_token(&context[S0w], get_s0(state))
|
fill_token(&context[S0w], get_s0(state))
|
||||||
#fill_token(&context[S0r2w], get_right(state, get_s0(state), 1))
|
fill_token(&context[S0r2w], get_right(state, get_s0(state), 2))
|
||||||
fill_token(&context[S0rw], get_right(state, get_s0(state), 0))
|
fill_token(&context[S0rw], get_right(state, get_s0(state), 1))
|
||||||
#fill_token(&context[N0lw], get_left(state, get_n0(state), 0))
|
fill_token(&context[N0lw], get_left(state, get_n0(state), 0))
|
||||||
#fill_token(&context[N0l2w], get_left(state, get_n0(state), 1))
|
fill_token(&context[N0l2w], get_left(state, get_n0(state), 1))
|
||||||
fill_token(&context[N0w], get_n0(state))
|
fill_token(&context[N0w], get_n0(state))
|
||||||
#fill_token(&context[N1w], get_n1(state))
|
fill_token(&context[N1w], get_n1(state))
|
||||||
#fill_token(&context[N2w], get_n2(state))
|
fill_token(&context[N2w], get_n2(state))
|
||||||
|
|
||||||
#if state.stack_len >= 1:
|
if state.stack_len >= 1:
|
||||||
# context[dist] = state.stack[0] - state.sent
|
context[dist] = state.stack[0] - state.i
|
||||||
#else:
|
else:
|
||||||
# context[dist] = 0
|
context[dist] = 0
|
||||||
#context[N0lv] = 0
|
context[N0lv] = 0
|
||||||
#context[S0lv] = 0
|
context[S0lv] = 0
|
||||||
#context[S0rv] = 0
|
context[S0rv] = 0
|
||||||
#context[S1lv] = 0
|
context[S1lv] = 0
|
||||||
#context[S1rv] = 0
|
context[S1rv] = 0
|
||||||
|
|
||||||
|
|
||||||
arc_eager = (
|
arc_eager = (
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -7,15 +7,16 @@ from ..tokens cimport TokenC
|
||||||
|
|
||||||
cdef struct State:
|
cdef struct State:
|
||||||
TokenC* sent
|
TokenC* sent
|
||||||
|
int* stack
|
||||||
int i
|
int i
|
||||||
int sent_len
|
int sent_len
|
||||||
int stack_len
|
int stack_len
|
||||||
|
|
||||||
|
|
||||||
cdef int add_dep(State *s, TokenC* head, TokenC* child, int label) except -1
|
cdef int add_dep(const State *s, const int head, const int child, const int label) except -1
|
||||||
|
|
||||||
|
|
||||||
cdef TokenC* pop_stack(State *s) except NULL
|
cdef int pop_stack(State *s) except -1
|
||||||
cdef int push_stack(State *s) except -1
|
cdef int push_stack(State *s) except -1
|
||||||
|
|
||||||
|
|
||||||
|
@ -32,33 +33,35 @@ cdef inline TokenC* get_n0(const State* s) nogil:
|
||||||
|
|
||||||
|
|
||||||
cdef inline TokenC* get_n1(const State* s) nogil:
|
cdef inline TokenC* get_n1(const State* s) nogil:
|
||||||
if s.i < (s.sent_len - 1):
|
if (s.i+1) >= s.sent_len:
|
||||||
return &s.sent[s.i+1]
|
return NULL
|
||||||
else:
|
else:
|
||||||
return s.sent - 1
|
return &s.sent[s.i+1]
|
||||||
|
|
||||||
|
|
||||||
cdef inline TokenC* get_n2(const State* s) nogil:
|
cdef inline TokenC* get_n2(const State* s) nogil:
|
||||||
|
if (s.i + 2) >= s.sent_len:
|
||||||
|
return NULL
|
||||||
|
else:
|
||||||
return &s.sent[s.i+2]
|
return &s.sent[s.i+2]
|
||||||
|
|
||||||
|
|
||||||
cdef inline TokenC* get_s0(const State *s) nogil:
|
cdef inline TokenC* get_s0(const State *s) nogil:
|
||||||
return s.stack[0]
|
return &s.sent[s.stack[0]]
|
||||||
|
|
||||||
|
|
||||||
cdef inline TokenC* get_s1(const State *s) nogil:
|
cdef inline TokenC* get_s1(const State *s) nogil:
|
||||||
# Rely on our padding to ensure we don't go out of bounds here
|
# Rely on our padding to ensure we don't go out of bounds here
|
||||||
cdef TokenC** s1 = s.stack - 1
|
return &s.sent[s.stack[-1]]
|
||||||
return s1[0]
|
|
||||||
|
|
||||||
|
|
||||||
cdef inline TokenC* get_s2(const State *s) nogil:
|
cdef inline TokenC* get_s2(const State *s) nogil:
|
||||||
# Rely on our padding to ensure we don't go out of bounds here
|
# Rely on our padding to ensure we don't go out of bounds here
|
||||||
cdef TokenC** s2 = s.stack - 2
|
return &s.sent[s.stack[-2]]
|
||||||
return s2[0]
|
|
||||||
|
|
||||||
cdef TokenC* get_right(State* s, TokenC* head, int idx) nogil
|
cdef const TokenC* get_right(const State* s, const TokenC* head, const int idx) nogil
|
||||||
cdef TokenC* get_left(State* s, TokenC* head, int idx) nogil
|
|
||||||
|
cdef const TokenC* get_left(const State* s, const TokenC* head, const int idx) nogil
|
||||||
|
|
||||||
cdef inline bint at_eol(const State *s) nogil:
|
cdef inline bint at_eol(const State *s) nogil:
|
||||||
return s.i >= s.sent_len
|
return s.i >= s.sent_len
|
||||||
|
@ -68,10 +71,10 @@ cdef inline bint is_final(const State *s) nogil:
|
||||||
return at_eol(s) # The stack will be attached to root anyway
|
return at_eol(s) # The stack will be attached to root anyway
|
||||||
|
|
||||||
|
|
||||||
cdef int children_in_buffer(const State *s, const TokenC* target, list gold) except -1
|
cdef int children_in_buffer(const State *s, const int head, list gold) except -1
|
||||||
cdef int head_in_buffer(const State *s, const TokenC* target, list gold) except -1
|
cdef int head_in_buffer(const State *s, const int child, list gold) except -1
|
||||||
cdef int children_in_stack(const State *s, const TokenC* target, list gold) except -1
|
cdef int children_in_stack(const State *s, const int head, list gold) except -1
|
||||||
cdef int head_in_stack(const State *s, const TokenC*, list gold) except -1
|
cdef int head_in_stack(const State *s, const int child, list gold) except -1
|
||||||
|
|
||||||
cdef State* init_state(Pool mem, TokenC* sent, const int sent_length) except NULL
|
cdef State* init_state(Pool mem, TokenC* sent, const int sent_length) except NULL
|
||||||
|
|
||||||
|
@ -81,5 +84,7 @@ cdef inline uint32_t _nth_significant_bit(uint32_t bits, int n) nogil:
|
||||||
cdef int i
|
cdef int i
|
||||||
for i in range(32):
|
for i in range(32):
|
||||||
if bits & (1 << i):
|
if bits & (1 << i):
|
||||||
|
n -= 1
|
||||||
|
if n < 1:
|
||||||
return i
|
return i
|
||||||
return 0
|
return 0
|
||||||
|
|
|
@ -5,104 +5,82 @@ from cymem.cymem cimport Pool
|
||||||
from ..lexeme cimport EMPTY_LEXEME
|
from ..lexeme cimport EMPTY_LEXEME
|
||||||
|
|
||||||
|
|
||||||
cdef int add_dep(State *s, TokenC* head, TokenC* child, int label) except -1:
|
cdef int add_dep(State *s, int head, int child, int label) except -1:
|
||||||
child.head = head - child
|
s.sent[child].head = head - child
|
||||||
child.dep_tag = label
|
s.sent[child].dep_tag = label
|
||||||
# Keep a bit-vector tracking child dependencies. If a word has a child at
|
# Keep a bit-vector tracking child dependencies. If a word has a child at
|
||||||
# offset i from it, set that bit (tracking left and right separately)
|
# offset i from it, set that bit (tracking left and right separately)
|
||||||
if child > head:
|
if child > head:
|
||||||
head.r_kids |= 1 << child.head
|
s.sent[head].r_kids |= 1 << (-s.sent[child].head)
|
||||||
else:
|
else:
|
||||||
head.l_kids |= 1 << (-child.head)
|
s.sent[head].l_kids |= 1 << s.sent[child].head
|
||||||
|
|
||||||
|
|
||||||
cdef TokenC* pop_stack(State *s) except NULL:
|
cdef int pop_stack(State *s) except -1:
|
||||||
assert s.stack_len >= 1
|
assert s.stack_len >= 1
|
||||||
cdef TokenC* top = s.stack[0]
|
|
||||||
s.stack -= 1
|
|
||||||
s.stack_len -= 1
|
s.stack_len -= 1
|
||||||
return top
|
s.stack -= 1
|
||||||
|
|
||||||
|
|
||||||
cdef int push_stack(State *s) except -1:
|
cdef int push_stack(State *s) except -1:
|
||||||
assert s.i < s.sent_len
|
assert s.i < s.sent_len
|
||||||
s.stack += 1
|
s.stack += 1
|
||||||
s.stack[0] = &s.sent[s.i]
|
s.stack[0] = s.i
|
||||||
s.stack_len += 1
|
s.stack_len += 1
|
||||||
s.i += 1
|
s.i += 1
|
||||||
|
|
||||||
|
|
||||||
cdef int children_in_buffer(const State *s, const TokenC* target, list gold) except -1:
|
cdef int children_in_buffer(const State *s, int head, list gold) except -1:
|
||||||
# Golds holds an array of head offsets --- the head of word i is i - golds[i]
|
# Golds holds an array of head offsets --- the head of word i is i - golds[i]
|
||||||
# Iterate over the tokens of the queue, and check whether their gold head is
|
# Iterate over the tokens of the queue, and check whether their gold head is
|
||||||
# our target
|
# our target
|
||||||
cdef int i
|
cdef int i
|
||||||
cdef int n = 0
|
cdef int n = 0
|
||||||
cdef TokenC* buff_word
|
|
||||||
cdef TokenC* buff_head
|
|
||||||
cdef int buff_word_head_offset
|
|
||||||
for i in range(s.i, s.sent_len):
|
for i in range(s.i, s.sent_len):
|
||||||
buff_word = &s.sent[i]
|
if gold[i] == head:
|
||||||
buff_word_head_offset = gold[i]
|
|
||||||
buff_head = buff_word + buff_word_head_offset
|
|
||||||
if buff_head == target:
|
|
||||||
n += 1
|
n += 1
|
||||||
return n
|
return n
|
||||||
|
|
||||||
|
|
||||||
cdef int head_in_buffer(const State *s, const TokenC* target, list gold) except -1:
|
cdef int head_in_buffer(const State *s, const int child, list gold) except -1:
|
||||||
cdef int target_idx = get_idx(s, target)
|
return gold[child] >= s.i
|
||||||
cdef int target_head_idx = target_idx + gold[target_idx]
|
|
||||||
return target_head_idx >= s.i
|
|
||||||
|
|
||||||
|
|
||||||
cdef int children_in_stack(const State *s, const TokenC* target, list gold) except -1:
|
cdef int children_in_stack(const State *s, const int head, list gold) except -1:
|
||||||
if s.stack_len == 0:
|
|
||||||
return 0
|
|
||||||
cdef int i
|
cdef int i
|
||||||
cdef int n = 0
|
cdef int n = 0
|
||||||
cdef const TokenC* stack_word
|
|
||||||
cdef const TokenC* stack_word_head
|
|
||||||
cdef int stack_word_head_offset
|
|
||||||
for i in range(s.stack_len):
|
for i in range(s.stack_len):
|
||||||
stack_word = (s.stack - i)[0]
|
if gold[s.stack[-i]] == head:
|
||||||
stack_word_head_offset = gold[get_idx(s, stack_word)]
|
|
||||||
stack_word_head = (s.stack + stack_word_head_offset)[0]
|
|
||||||
if stack_word_head == target:
|
|
||||||
n += 1
|
n += 1
|
||||||
return n
|
return n
|
||||||
|
|
||||||
|
|
||||||
cdef int head_in_stack(const State *s, const TokenC* target, list gold) except -1:
|
cdef int head_in_stack(const State *s, const int child, list gold) except -1:
|
||||||
if s.stack_len == 0:
|
|
||||||
return 0
|
|
||||||
cdef int head_offset = gold[get_idx(s, target)]
|
|
||||||
cdef const TokenC* target_head = target + head_offset
|
|
||||||
cdef int i
|
cdef int i
|
||||||
for i in range(s.stack_len):
|
for i in range(s.stack_len):
|
||||||
if target_head == (s.stack - i)[0]:
|
if gold[child] == s.stack[-i]:
|
||||||
return 1
|
return 1
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
cdef TokenC* get_left(State* s, TokenC* head, int idx) nogil:
|
cdef const TokenC* get_left(const State* s, const TokenC* head, const int idx) nogil:
|
||||||
cdef uint32_t kids = head.l_kids
|
cdef uint32_t kids = head.l_kids
|
||||||
if kids == 0:
|
if kids == 0:
|
||||||
return s.sent - 1
|
return NULL
|
||||||
cdef int offset = _nth_significant_bit(kids, idx)
|
cdef int offset = _nth_significant_bit(kids, idx)
|
||||||
cdef TokenC* child = head - offset
|
cdef const TokenC* child = head - offset
|
||||||
if child >= s.sent:
|
if child >= s.sent:
|
||||||
return child
|
return child
|
||||||
else:
|
else:
|
||||||
return s.sent - 1
|
return s.sent - 1
|
||||||
|
|
||||||
|
|
||||||
cdef TokenC* get_right(State* s, TokenC* head, int idx) nogil:
|
cdef const TokenC* get_right(const State* s, const TokenC* head, const int idx) nogil:
|
||||||
cdef uint32_t kids = head.r_kids
|
cdef uint32_t kids = head.r_kids
|
||||||
if kids == 0:
|
if kids == 0:
|
||||||
return s.sent - 1
|
return NULL
|
||||||
cdef int offset = _nth_significant_bit(kids, idx)
|
cdef int offset = _nth_significant_bit(kids, idx)
|
||||||
cdef TokenC* child = head + offset
|
cdef const TokenC* child = head + offset
|
||||||
if child < (s.sent + s.sent_len):
|
if child < (s.sent + s.sent_len):
|
||||||
return child
|
return child
|
||||||
else:
|
else:
|
||||||
|
@ -115,13 +93,11 @@ DEF PADDING = 5
|
||||||
cdef State* init_state(Pool mem, TokenC* sent, const int sent_length) except NULL:
|
cdef State* init_state(Pool mem, TokenC* sent, const int sent_length) except NULL:
|
||||||
cdef int padded_len = sent_length + PADDING + PADDING
|
cdef int padded_len = sent_length + PADDING + PADDING
|
||||||
cdef State* s = <State*>mem.alloc(1, sizeof(State))
|
cdef State* s = <State*>mem.alloc(1, sizeof(State))
|
||||||
s.stack = <TokenC**>mem.alloc(padded_len, sizeof(TokenC*))
|
s.stack = <int*>mem.alloc(padded_len, sizeof(int))
|
||||||
cdef TokenC* eol_token = sent - 1
|
|
||||||
for i in range(PADDING):
|
for i in range(PADDING):
|
||||||
# sent should be padded, with a suitable sentinel token here
|
s.stack[i] = -1
|
||||||
s.stack[0] = eol_token
|
s.stack += (PADDING - 1)
|
||||||
s.stack += 1
|
assert s.stack[0] == -1
|
||||||
s.stack[0] = eol_token
|
|
||||||
s.sent = sent
|
s.sent = sent
|
||||||
s.stack_len = 0
|
s.stack_len = 0
|
||||||
s.i = 0
|
s.i = 0
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -35,43 +35,35 @@ cdef inline bint _can_reduce(const State* s) nogil:
|
||||||
cdef int _shift_cost(const State* s, list gold) except -1:
|
cdef int _shift_cost(const State* s, list gold) except -1:
|
||||||
assert not at_eol(s)
|
assert not at_eol(s)
|
||||||
cost = 0
|
cost = 0
|
||||||
cost += head_in_stack(s, get_n0(s), gold)
|
cost += head_in_stack(s, s.i, gold)
|
||||||
cost += children_in_stack(s, get_n0(s), gold)
|
cost += children_in_stack(s, s.i, gold)
|
||||||
return cost
|
return cost
|
||||||
|
|
||||||
|
|
||||||
cdef int _right_cost(const State* s, list gold) except -1:
|
cdef int _right_cost(const State* s, list gold) except -1:
|
||||||
assert s.stack_len >= 1
|
assert s.stack_len >= 1
|
||||||
cdef int s0_idx = get_idx(s, get_s0(s))
|
|
||||||
cost = 0
|
cost = 0
|
||||||
if _gold_dep(s, get_s0(s), get_n0(s), gold):
|
if gold[s.i] == s.stack[0]:
|
||||||
return cost
|
return cost
|
||||||
cost += head_in_buffer(s, get_n0(s), gold)
|
cost += head_in_buffer(s, s.i, gold)
|
||||||
cost += children_in_stack(s, get_n0(s), gold)
|
cost += children_in_stack(s, s.i, gold)
|
||||||
cost += head_in_stack(s, get_n0(s), gold)
|
cost += head_in_stack(s, s.i, gold)
|
||||||
return cost
|
return cost
|
||||||
|
|
||||||
|
|
||||||
cdef int _left_cost(const State* s, list gold) except -1:
|
cdef int _left_cost(const State* s, list gold) except -1:
|
||||||
assert s.stack_len >= 1
|
assert s.stack_len >= 1
|
||||||
cost = 0
|
cost = 0
|
||||||
if _gold_dep(s, get_n0(s), get_s0(s), gold):
|
if gold[s.stack[0]] == s.i:
|
||||||
return cost
|
return cost
|
||||||
|
|
||||||
cost += head_in_buffer(s, get_s0(s), gold)
|
cost += head_in_buffer(s, s.stack[0], gold)
|
||||||
cost += children_in_buffer(s, get_s0(s), gold)
|
cost += children_in_buffer(s, s.stack[0], gold)
|
||||||
return cost
|
return cost
|
||||||
|
|
||||||
|
|
||||||
cdef int _reduce_cost(const State* s, list gold) except -1:
|
cdef int _reduce_cost(const State* s, list gold) except -1:
|
||||||
return children_in_buffer(s, get_s0(s), gold)
|
return children_in_buffer(s, s.stack[0], gold)
|
||||||
|
|
||||||
|
|
||||||
cdef int _gold_dep(const State* s, const TokenC* head, const TokenC* child,
|
|
||||||
list gold_offsets) except -1:
|
|
||||||
cdef int head_idx = get_idx(s, head)
|
|
||||||
cdef int child_idx = get_idx(s, child)
|
|
||||||
return child_idx + gold_offsets[child_idx] == head_idx
|
|
||||||
|
|
||||||
|
|
||||||
cdef class TransitionSystem:
|
cdef class TransitionSystem:
|
||||||
|
@ -109,10 +101,10 @@ cdef class TransitionSystem:
|
||||||
if t.move == SHIFT:
|
if t.move == SHIFT:
|
||||||
push_stack(s)
|
push_stack(s)
|
||||||
elif t.move == LEFT:
|
elif t.move == LEFT:
|
||||||
add_dep(s, get_n0(s), get_s0(s), t.label)
|
add_dep(s, s.i, s.stack[0], t.label)
|
||||||
pop_stack(s)
|
pop_stack(s)
|
||||||
elif t.move == RIGHT:
|
elif t.move == RIGHT:
|
||||||
add_dep(s, get_s0(s), get_n0(s), t.label)
|
add_dep(s, s.stack[0], s.i, t.label)
|
||||||
push_stack(s)
|
push_stack(s)
|
||||||
elif t.move == REDUCE:
|
elif t.move == REDUCE:
|
||||||
pop_stack(s)
|
pop_stack(s)
|
||||||
|
@ -157,12 +149,12 @@ cdef class TransitionSystem:
|
||||||
if move == SHIFT or move == REDUCE:
|
if move == SHIFT or move == REDUCE:
|
||||||
cost = 0
|
cost = 0
|
||||||
elif move == LEFT:
|
elif move == LEFT:
|
||||||
if _gold_dep(s, get_n0(s), get_s0(s), gold_heads):
|
if gold_heads[s.stack[0]] == s.i:
|
||||||
cost = label != gold_labels[get_idx(s, get_s0(s))]
|
cost = label != gold_labels[s.stack[0]]
|
||||||
else:
|
else:
|
||||||
cost = 0
|
cost = 0
|
||||||
elif move == RIGHT:
|
elif move == RIGHT:
|
||||||
if _gold_dep(s, get_s0(s), get_n0(s), gold_heads):
|
if gold_heads[s.i] == s.stack[0]:
|
||||||
cost = label != gold_labels[s.i]
|
cost = label != gold_labels[s.i]
|
||||||
else:
|
else:
|
||||||
cost = 0
|
cost = 0
|
||||||
|
@ -173,24 +165,14 @@ cdef class TransitionSystem:
|
||||||
score = scores[i]
|
score = scores[i]
|
||||||
|
|
||||||
if best < 0:
|
if best < 0:
|
||||||
for i in range(self.n_moves):
|
print unl_costs[SHIFT], unl_costs[REDUCE], unl_costs[LEFT], unl_costs[RIGHT]
|
||||||
if self._moves[i].move == LEFT:
|
print s.stack_len
|
||||||
print self._moves[i].label,
|
print has_head(get_s0(s))
|
||||||
print
|
print s.sent[s.stack[0]].head
|
||||||
print _gold_dep(s, get_n0(s), get_s0(s), gold_heads)
|
print s.stack[0], s.i
|
||||||
print gold_labels[get_idx(s, get_s0(s))]
|
print gold_heads[s.stack[0]], gold_heads[s.i]
|
||||||
print unl_costs[LEFT]
|
print gold_labels[s.i]
|
||||||
print "S0:"
|
print children_in_buffer(s, s.stack[0], gold_heads)
|
||||||
print "Head:", gold_heads[get_idx(s, get_s0(s))]
|
print head_in_buffer(s, s.stack[0], gold_heads)
|
||||||
print "h. in b.", head_in_buffer(s, get_s0(s), gold_heads)
|
|
||||||
print "c. in b.", children_in_buffer(s, get_s0(s), gold_heads)
|
|
||||||
print "h. in s.", head_in_stack(s, get_s0(s), gold_heads)
|
|
||||||
print "c. in s.", children_in_stack(s, get_s0(s), gold_heads)
|
|
||||||
print "N0:"
|
|
||||||
print "Head:", gold_heads[get_idx(s, get_n0(s))]
|
|
||||||
print "h. in b.", head_in_buffer(s, get_n0(s), gold_heads)
|
|
||||||
print "c. in b.", children_in_buffer(s, get_n0(s), gold_heads)
|
|
||||||
print "h. in s.", head_in_stack(s, get_n0(s), gold_heads)
|
|
||||||
print "c. in s.", children_in_stack(s, get_n0(s), gold_heads)
|
|
||||||
raise StandardError
|
raise StandardError
|
||||||
return best
|
return best
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -32,9 +32,6 @@ from . import _parse_features
|
||||||
from ._parse_features cimport fill_context, CONTEXT_SIZE
|
from ._parse_features cimport fill_context, CONTEXT_SIZE
|
||||||
|
|
||||||
|
|
||||||
DEF CONTEXT_SIZE = 50
|
|
||||||
|
|
||||||
|
|
||||||
DEBUG = False
|
DEBUG = False
|
||||||
def set_debug(val):
|
def set_debug(val):
|
||||||
global DEBUG
|
global DEBUG
|
||||||
|
@ -43,8 +40,8 @@ def set_debug(val):
|
||||||
|
|
||||||
cdef unicode print_state(State* s, list words):
|
cdef unicode print_state(State* s, list words):
|
||||||
words = list(words) + ['EOL']
|
words = list(words) + ['EOL']
|
||||||
top = words[get_idx(s, get_s0(s))]
|
top = words[s.stack[0]]
|
||||||
second = words[get_idx(s, get_s1(s))]
|
second = words[s.stack[-1]]
|
||||||
n0 = words[s.i]
|
n0 = words[s.i]
|
||||||
n1 = words[s.i + 1]
|
n1 = words[s.i + 1]
|
||||||
return ' '.join((second, top, '|', n0, n1))
|
return ' '.join((second, top, '|', n0, n1))
|
||||||
|
@ -61,7 +58,7 @@ cdef class GreedyParser:
|
||||||
self.extractor = Extractor(get_templates(self.cfg.features))
|
self.extractor = Extractor(get_templates(self.cfg.features))
|
||||||
self.moves = TransitionSystem(self.cfg.left_labels, self.cfg.right_labels)
|
self.moves = TransitionSystem(self.cfg.left_labels, self.cfg.right_labels)
|
||||||
|
|
||||||
self.model = LinearModel(self.moves.n_moves, self.extractor.n_templ)
|
self.model = LinearModel(self.moves.n_moves, self.extractor.n_templ + 10000)
|
||||||
if os.path.exists(pjoin(model_dir, 'model')):
|
if os.path.exists(pjoin(model_dir, 'model')):
|
||||||
self.model.load(pjoin(model_dir, 'model'))
|
self.model.load(pjoin(model_dir, 'model'))
|
||||||
|
|
||||||
|
@ -94,7 +91,12 @@ cdef class GreedyParser:
|
||||||
cdef Pool mem = Pool()
|
cdef Pool mem = Pool()
|
||||||
cdef State* state = init_state(mem, tokens.data, tokens.length)
|
cdef State* state = init_state(mem, tokens.data, tokens.length)
|
||||||
words = [t.string for t in tokens]
|
words = [t.string for t in tokens]
|
||||||
|
if DEBUG:
|
||||||
|
print words
|
||||||
|
print gold_heads
|
||||||
while not is_final(state):
|
while not is_final(state):
|
||||||
|
if DEBUG:
|
||||||
|
print print_state(state, words)
|
||||||
fill_context(context, state)
|
fill_context(context, state)
|
||||||
feats = self.extractor.get_feats(context, &n_feats)
|
feats = self.extractor.get_feats(context, &n_feats)
|
||||||
scores = self.model.get_scores(feats, n_feats)
|
scores = self.model.get_scores(feats, n_feats)
|
||||||
|
@ -109,5 +111,5 @@ cdef class GreedyParser:
|
||||||
cdef int i
|
cdef int i
|
||||||
n_corr = 0
|
n_corr = 0
|
||||||
for i in range(tokens.length):
|
for i in range(tokens.length):
|
||||||
n_corr += state.sent[i].head == gold_heads[i]
|
n_corr += (i + state.sent[i].head) == gold_heads[i]
|
||||||
return n_corr
|
return n_corr
|
||||||
|
|
Loading…
Reference in New Issue
Block a user