mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	* Improve features for NER
This commit is contained in:
		
							parent
							
								
									7ecb52c0ed
								
							
						
					
					
						commit
						e181c051d5
					
				|  | @ -187,6 +187,28 @@ cpdef enum: | |||
|     P2_suffix | ||||
|     P2_shape | ||||
|     | ||||
|     E0w | ||||
|     E0W | ||||
|     E0p | ||||
|     E0c | ||||
|     E0c4 | ||||
|     E0c6 | ||||
|     E0L | ||||
|     E0_prefix | ||||
|     E0_suffix | ||||
|     E0_shape | ||||
|     | ||||
|     E1w | ||||
|     E1W | ||||
|     E1p | ||||
|     E1c | ||||
|     E1c4 | ||||
|     E1c6 | ||||
|     E1L | ||||
|     E1_prefix | ||||
|     E1_suffix | ||||
|     E1_shape | ||||
|     | ||||
|     # Misc features at the end | ||||
|     dist | ||||
|     N0lv | ||||
|  |  | |||
|  | @ -13,6 +13,7 @@ from ..tokens cimport TokenC | |||
| from ._state cimport State | ||||
| from ._state cimport get_s2, get_s1, get_s0, get_n0, get_n1, get_n2 | ||||
| from ._state cimport get_p2, get_p1 | ||||
| from ._state cimport get_e0, get_e1 | ||||
| from ._state cimport has_head, get_left, get_right | ||||
| from ._state cimport count_left_kids, count_right_kids | ||||
| 
 | ||||
|  | @ -69,6 +70,8 @@ cdef int fill_context(atom_t* context, State* state) except -1: | |||
|     fill_token(&context[P1w], get_p1(state)) | ||||
|     fill_token(&context[P2w], get_p2(state)) | ||||
| 
 | ||||
|     fill_token(&context[E0w], get_e0(state)) | ||||
|     fill_token(&context[E1w], get_e1(state)) | ||||
|     if state.stack_len >= 1: | ||||
|         context[dist] = state.stack[0] - state.i | ||||
|     else: | ||||
|  | @ -135,6 +138,32 @@ ner = ( | |||
| 
 | ||||
|     (P1c, N0c), | ||||
|     (N0c, N1c), | ||||
| 
 | ||||
|     (E0w,), | ||||
|     (E0c,), | ||||
|     (E0p,), | ||||
| 
 | ||||
|     (E0w, N0w), | ||||
|     (E0c, N0w), | ||||
|     (E0p, N0w), | ||||
| 
 | ||||
|     (E0p, P1p, N0p), | ||||
|     (E0c, P1c, N0c), | ||||
| 
 | ||||
|     (E0w, P1c), | ||||
|     (E0p, P1p), | ||||
|     (E0c, P1c), | ||||
|     (E0p, E1p), | ||||
|     (E0c, P1p), | ||||
| 
 | ||||
|     (E1w,), | ||||
|     (E1c,), | ||||
|     (E1p,), | ||||
| 
 | ||||
|     (E0w, E1w), | ||||
|     (E0w, E1p,), | ||||
|     (E0p, E1w,), | ||||
|     (E0p, E1w), | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -54,6 +54,19 @@ cdef inline TokenC* get_p2(const State* s) nogil: | |||
|         return &s.sent[s.i-2] | ||||
| 
 | ||||
| 
 | ||||
| cdef inline TokenC* get_e0(const State* s) nogil: | ||||
|     if s.ent.end != 0: | ||||
|         return NULL | ||||
|     else: | ||||
|         return &s.sent[s.ent.start] | ||||
| 
 | ||||
| 
 | ||||
| cdef inline TokenC* get_e1(const State* s) nogil: | ||||
|     if s.ent.end != 0 or s.ent.start >= (s.i + 1): | ||||
|         return NULL | ||||
|     else: | ||||
|         return &s.sent[s.ent.start + 1] | ||||
| 
 | ||||
| 
 | ||||
| cdef inline TokenC* get_n2(const State* s) nogil: | ||||
|     if (s.i + 2) >= s.sent_len: | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user