* Disable c-parsing transitions

This commit is contained in:
Matthew Honnibal 2015-05-12 22:33:25 +02:00
parent 03a6626545
commit f1e0272b18

View File

@ -297,60 +297,62 @@ cdef int _break_cost(const Transition* self, const State* s, GoldParse gold) exc
cdef int _constituent_cost(const Transition* self, const State* s, GoldParse gold) except -1: cdef int _constituent_cost(const Transition* self, const State* s, GoldParse gold) except -1:
if not _can_constituent(s): if not _can_constituent(s):
return 9000 return 9000
raise Exception("Constituent move should be disabled currently")
# The gold standard is indexed by end, then by start, then a set of labels # The gold standard is indexed by end, then by start, then a set of labels
brackets = gold.brackets(get_s0(s).r_edge, {}) #brackets = gold.brackets(get_s0(s).r_edge, {})
if not brackets: #if not brackets:
return 2 # 2 loss for bad bracket, only 1 for good bracket bad label # return 2 # 2 loss for bad bracket, only 1 for good bracket bad label
# Index the current brackets in the state # Index the current brackets in the state
existing = set() #existing = set()
for i in range(s.ctnt_len): #for i in range(s.ctnt_len):
if ctnt.end == s.r_edge and ctnt.label == self.label: # if ctnt.end == s.r_edge and ctnt.label == self.label:
existing.add(ctnt.start) # existing.add(ctnt.start)
cdef int loss = 2 #cdef int loss = 2
cdef const TokenC* child #cdef const TokenC* child
cdef const TokenC* s0 = get_s0(s) #cdef const TokenC* s0 = get_s0(s)
cdef int n_left = count_left_kids(s0) #cdef int n_left = count_left_kids(s0)
# Iterate over the possible start positions, and check whether we have a # Iterate over the possible start positions, and check whether we have a
# (start, end, label) match to the gold tree # (start, end, label) match to the gold tree
for i in range(1, n_left): #for i in range(1, n_left):
child = get_left(s, s0, i) # child = get_left(s, s0, i)
if child.l_edge in brackets and child.l_edge not in existing: # if child.l_edge in brackets and child.l_edge not in existing:
if self.label in brackets[child.l_edge] # if self.label in brackets[child.l_edge]
return 0 # return 0
else: # else:
loss = 1 # If we see the start position, set loss to 1 # loss = 1 # If we see the start position, set loss to 1
return loss #return loss
cdef int _adjust_cost(const Transition* self, const State* s, GoldParse gold) except -1: cdef int _adjust_cost(const Transition* self, const State* s, GoldParse gold) except -1:
if not _can_adjust(s): if not _can_adjust(s):
return 9000 return 9000
raise Exception("Adjust move should be disabled currently")
# The gold standard is indexed by end, then by start, then a set of labels # The gold standard is indexed by end, then by start, then a set of labels
gold_starts = gold.brackets(get_s0(s).r_edge, {}) #gold_starts = gold.brackets(get_s0(s).r_edge, {})
# Case 1: There are 0 brackets ending at this word. # Case 1: There are 0 brackets ending at this word.
# --> Cost is sunk, but must allow brackets to begin # --> Cost is sunk, but must allow brackets to begin
if not gold_starts: #if not gold_starts:
return 0 # return 0
# Is the top bracket correct? # Is the top bracket correct?
gold_labels = gold_starts.get(s.ctnt.start, set()) #gold_labels = gold_starts.get(s.ctnt.start, set())
# TODO: Case where we have a unary rule # TODO: Case where we have a unary rule
# TODO: Case where two brackets end on this word, with top bracket starting # TODO: Case where two brackets end on this word, with top bracket starting
# before # before
cdef const TokenC* child #cdef const TokenC* child
cdef const TokenC* s0 = get_s0(s) #cdef const TokenC* s0 = get_s0(s)
cdef int n_left = count_left_kids(s0) #cdef int n_left = count_left_kids(s0)
cdef int i #cdef int i
# Iterate over the possible start positions, and check whether we have a # Iterate over the possible start positions, and check whether we have a
# (start, end, label) match to the gold tree # (start, end, label) match to the gold tree
for i in range(1, n_left): #for i in range(1, n_left):
child = get_left(s, s0, i) # child = get_left(s, s0, i)
if child.l_edge in brackets: # if child.l_edge in brackets:
if self.label in brackets[child.l_edge]: # if self.label in brackets[child.l_edge]:
return 0 # return 0
else: # else:
loss = 1 # If we see the start position, set loss to 1 # loss = 1 # If we see the start position, set loss to 1
return loss #return loss
get_cost_funcs[SHIFT] = _shift_cost get_cost_funcs[SHIFT] = _shift_cost
@ -403,18 +405,20 @@ cdef inline bint _can_break(const State* s) nogil:
cdef inline bint _can_constituent(const State* s) nogil: cdef inline bint _can_constituent(const State* s) nogil:
return s.stack_len >= 1 return False
#return s.stack_len >= 1
cdef inline bint _can_adjust(const State* s) nogil: cdef inline bint _can_adjust(const State* s) nogil:
# Need a left child to move the bracket to
cdef const TokenC* child
cdef const TokenC* s0 = get_s0(s)
cdef int n_left = count_left_kids(s0)
cdef int i
for i in range(1, n_left):
child = get_left(s, s0, i)
if child.l_edge < s.ctnt.start:
return True
else:
return False return False
# Need a left child to move the bracket to
#cdef const TokenC* child
#cdef const TokenC* s0 = get_s0(s)
#cdef int n_left = count_left_kids(s0)
#cdef int i
#for i in range(1, n_left):
# child = get_left(s, s0, i)
# if child.l_edge < s.ctnt.start:
# return True
#else:
# return False