mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Fix ZERO_PLUS operator
This commit is contained in:
		
							parent
							
								
									9115c3ba0a
								
							
						
					
					
						commit
						1b01685f47
					
				|  | @ -68,13 +68,11 @@ cdef find_matches(TokenPatternC** patterns, int n, Doc doc): | ||||||
|         cache = PreshMap() |         cache = PreshMap() | ||||||
|         nexts.clear() |         nexts.clear() | ||||||
|         for j in range(curr_states.size()): |         for j in range(curr_states.size()): | ||||||
|             action = get_action(curr_states[j], &doc.c[i], extra_attrs[i], cache) |  | ||||||
|             transition(matches, nexts, |             transition(matches, nexts, | ||||||
|                 action, curr_states[j], i) |                 curr_states[j], i, doc, extra_attrs, cache) | ||||||
|         for j in range(init_states.size()): |         for j in range(init_states.size()): | ||||||
|             action = get_action(init_states[j], &doc.c[i], extra_attrs[i], cache) |  | ||||||
|             transition(matches, nexts, |             transition(matches, nexts, | ||||||
|                 action, init_states[j], i) |                 init_states[j], i, doc, extra_attrs, cache) | ||||||
|         nexts, curr_states = curr_states, nexts |         nexts, curr_states = curr_states, nexts | ||||||
|     # Filter out matches that have a longer equivalent. |     # Filter out matches that have a longer equivalent. | ||||||
|     longest_matches = {} |     longest_matches = {} | ||||||
|  | @ -89,19 +87,26 @@ cdef find_matches(TokenPatternC** patterns, int n, Doc doc): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| cdef void transition(vector[MatchC]& matches, vector[PatternStateC]& nexts, | cdef void transition(vector[MatchC]& matches, vector[PatternStateC]& nexts, | ||||||
|         ActionC action, PatternStateC state, int token) except *: |         PatternStateC state, int token, | ||||||
|  |         Doc doc, const attr_t* const* extra_attrs, PreshMap cache) except *: | ||||||
|  |     action = get_action(state, &doc.c[token], extra_attrs[token], cache) | ||||||
|     if state.start == -1: |     if state.start == -1: | ||||||
|         state.start = token |         state.start = token | ||||||
|     if action.is_match: |     if action.is_match: | ||||||
|         ent_id = state.state[1].attrs.value |         ent_id = state.state[1].attrs.value | ||||||
|         matches.push_back( |         matches.push_back( | ||||||
|             MatchC(pattern_id=ent_id, start=state.start, end=token+1)) |             MatchC(pattern_id=ent_id, start=state.start, end=token+1)) | ||||||
|     if action.keep_state: |  | ||||||
|         nexts.push_back(PatternStateC(start=state.start, state=state.state, |  | ||||||
|             last_action=action)) |  | ||||||
|     if action.advance_state: |     if action.advance_state: | ||||||
|         nexts.push_back(PatternStateC(start=state.start, |         nexts.push_back(PatternStateC(start=state.start, | ||||||
|             state=state.state+1, last_action=action)) |             state=state.state+1, last_action=action)) | ||||||
|  |     cdef PatternStateC next_state | ||||||
|  |     if action.keep_state and token < doc.length: | ||||||
|  |         # Keeping the state needs to not consume a token, so we call transition | ||||||
|  |         # with the next state | ||||||
|  |         next_state = PatternStateC(start=state.start, state=state.state+1, | ||||||
|  |                                    last_action=action) | ||||||
|  |         transition(matches, nexts, next_state, token, doc, extra_attrs, cache) | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| cdef ActionC get_action(PatternStateC state, const TokenC* token, const attr_t* extra_attrs, | cdef ActionC get_action(PatternStateC state, const TokenC* token, const attr_t* extra_attrs, | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user