mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Improve dynamic oracle when values are missing in parse
This commit is contained in:
		
							parent
							
								
									eddc0e0c74
								
							
						
					
					
						commit
						f56bd4736b
					
				|  | @ -58,9 +58,9 @@ cdef weight_t push_cost(StateClass stcls, const GoldParseC* gold, int target) no | ||||||
|     cdef int i, S_i |     cdef int i, S_i | ||||||
|     for i in range(stcls.stack_depth()): |     for i in range(stcls.stack_depth()): | ||||||
|         S_i = stcls.S(i) |         S_i = stcls.S(i) | ||||||
|         if gold.heads[target] == S_i: |         if gold.has_dep[target] and gold.heads[target] == S_i: | ||||||
|             cost += 1 |             cost += 1 | ||||||
|         if gold.heads[S_i] == target and (NON_MONOTONIC or not stcls.has_head(S_i)): |         if gold.has_dep[S_i] and gold.heads[S_i] == target and (NON_MONOTONIC or not stcls.has_head(S_i)): | ||||||
|             cost += 1 |             cost += 1 | ||||||
|         if BINARY_COSTS and cost >= 1: |         if BINARY_COSTS and cost >= 1: | ||||||
|             return cost |             return cost | ||||||
|  | @ -73,10 +73,12 @@ cdef weight_t pop_cost(StateClass stcls, const GoldParseC* gold, int target) nog | ||||||
|     cdef int i, B_i |     cdef int i, B_i | ||||||
|     for i in range(stcls.buffer_length()): |     for i in range(stcls.buffer_length()): | ||||||
|         B_i = stcls.B(i) |         B_i = stcls.B(i) | ||||||
|  |         if gold.has_dep[B_i]: | ||||||
|             cost += gold.heads[B_i] == target |             cost += gold.heads[B_i] == target | ||||||
|         cost += gold.heads[target] == B_i |  | ||||||
|             if gold.heads[B_i] == B_i or gold.heads[B_i] < target: |             if gold.heads[B_i] == B_i or gold.heads[B_i] < target: | ||||||
|                 break |                 break | ||||||
|  |         if gold.has_dep[target]: | ||||||
|  |             cost += gold.heads[target] == B_i | ||||||
|         if BINARY_COSTS and cost >= 1: |         if BINARY_COSTS and cost >= 1: | ||||||
|             return cost |             return cost | ||||||
|     if Break.is_valid(stcls.c, 0) and Break.move_cost(stcls, gold) == 0: |     if Break.is_valid(stcls.c, 0) and Break.move_cost(stcls, gold) == 0: | ||||||
|  | @ -107,6 +109,9 @@ cdef bint arc_is_gold(const GoldParseC* gold, int head, int child) nogil: | ||||||
| 
 | 
 | ||||||
| cdef bint label_is_gold(const GoldParseC* gold, int head, int child, attr_t label) nogil: | cdef bint label_is_gold(const GoldParseC* gold, int head, int child, attr_t label) nogil: | ||||||
|     if not gold.has_dep[child]: |     if not gold.has_dep[child]: | ||||||
|  |         if label == SUBTOK_LABEL: | ||||||
|  |             return False | ||||||
|  |         else: | ||||||
|             return True |             return True | ||||||
|     elif label == 0: |     elif label == 0: | ||||||
|         return True |         return True | ||||||
|  | @ -167,7 +172,7 @@ cdef class Reduce: | ||||||
|             # Decrement cost for the arcs e save |             # Decrement cost for the arcs e save | ||||||
|             for i in range(1, st.stack_depth()): |             for i in range(1, st.stack_depth()): | ||||||
|                 S_i = st.S(i) |                 S_i = st.S(i) | ||||||
|                 if gold.heads[st.S(0)] == S_i: |                 if gold.has_dep[st.S(0)] and gold.heads[st.S(0)] == S_i: | ||||||
|                     cost -= 1 |                     cost -= 1 | ||||||
|                 if gold.heads[S_i] == st.S(0): |                 if gold.heads[S_i] == st.S(0): | ||||||
|                     cost -= 1 |                     cost -= 1 | ||||||
|  | @ -208,7 +213,9 @@ cdef class LeftArc: | ||||||
|             # Account for deps we might lose between S0 and stack |             # Account for deps we might lose between S0 and stack | ||||||
|             if not s.has_head(s.S(0)): |             if not s.has_head(s.S(0)): | ||||||
|                 for i in range(1, s.stack_depth()): |                 for i in range(1, s.stack_depth()): | ||||||
|  |                     if gold.has_dep[s.S(i)]: | ||||||
|                         cost += gold.heads[s.S(i)] == s.S(0) |                         cost += gold.heads[s.S(i)] == s.S(0) | ||||||
|  |                     if gold.has_dep[s.S(0)]: | ||||||
|                         cost += gold.heads[s.S(0)] == s.S(i) |                         cost += gold.heads[s.S(0)] == s.S(i) | ||||||
|             return cost + pop_cost(s, gold, s.S(0)) + arc_cost(s, gold, s.B(0), s.S(0)) |             return cost + pop_cost(s, gold, s.S(0)) + arc_cost(s, gold, s.B(0), s.S(0)) | ||||||
| 
 | 
 | ||||||
|  | @ -284,18 +291,20 @@ cdef class Break: | ||||||
|             S_i = s.S(i) |             S_i = s.S(i) | ||||||
|             for j in range(s.buffer_length()): |             for j in range(s.buffer_length()): | ||||||
|                 B_i = s.B(j) |                 B_i = s.B(j) | ||||||
|  |                 if gold.has_dep[S_i]: | ||||||
|                     cost += gold.heads[S_i] == B_i |                     cost += gold.heads[S_i] == B_i | ||||||
|  |                 if gold.has_dep[B_i]: | ||||||
|                     cost += gold.heads[B_i] == S_i |                     cost += gold.heads[B_i] == S_i | ||||||
|                 if cost != 0: |                 if cost != 0: | ||||||
|                     return cost |                     return cost | ||||||
|         # Check for sentence boundary --- if it's here, we can't have any deps |         # Check for sentence boundary --- if it's here, we can't have any deps | ||||||
|         # between stack and buffer, so rest of action is irrelevant. |         # between stack and buffer, so rest of action is irrelevant. | ||||||
|         s0_root = _get_root(s.S(0), gold) |         if not gold.has_dep[s.S(0)] or not gold.has_dep[s.B(0)]: | ||||||
|         b0_root = _get_root(s.B(0), gold) |  | ||||||
|         if s0_root != b0_root or s0_root == -1 or b0_root == -1: |  | ||||||
|             return cost |             return cost | ||||||
|  |         if gold.sent_start[s.B_(0).l_edge] == -1: | ||||||
|  |             return cost+1 | ||||||
|         else: |         else: | ||||||
|             return cost + 1 |             return cost | ||||||
| 
 | 
 | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     cdef inline weight_t label_cost(StateClass s, const GoldParseC* gold, attr_t label) nogil: |     cdef inline weight_t label_cost(StateClass s, const GoldParseC* gold, attr_t label) nogil: | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user