mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	* Improve features for NER
This commit is contained in:
		
							parent
							
								
									7ecb52c0ed
								
							
						
					
					
						commit
						e181c051d5
					
				|  | @ -187,6 +187,28 @@ cpdef enum: | ||||||
|     P2_suffix |     P2_suffix | ||||||
|     P2_shape |     P2_shape | ||||||
|     |     | ||||||
|  |     E0w | ||||||
|  |     E0W | ||||||
|  |     E0p | ||||||
|  |     E0c | ||||||
|  |     E0c4 | ||||||
|  |     E0c6 | ||||||
|  |     E0L | ||||||
|  |     E0_prefix | ||||||
|  |     E0_suffix | ||||||
|  |     E0_shape | ||||||
|  |     | ||||||
|  |     E1w | ||||||
|  |     E1W | ||||||
|  |     E1p | ||||||
|  |     E1c | ||||||
|  |     E1c4 | ||||||
|  |     E1c6 | ||||||
|  |     E1L | ||||||
|  |     E1_prefix | ||||||
|  |     E1_suffix | ||||||
|  |     E1_shape | ||||||
|  |     | ||||||
|     # Misc features at the end |     # Misc features at the end | ||||||
|     dist |     dist | ||||||
|     N0lv |     N0lv | ||||||
|  |  | ||||||
|  | @ -13,6 +13,7 @@ from ..tokens cimport TokenC | ||||||
| from ._state cimport State | from ._state cimport State | ||||||
| from ._state cimport get_s2, get_s1, get_s0, get_n0, get_n1, get_n2 | from ._state cimport get_s2, get_s1, get_s0, get_n0, get_n1, get_n2 | ||||||
| from ._state cimport get_p2, get_p1 | from ._state cimport get_p2, get_p1 | ||||||
|  | from ._state cimport get_e0, get_e1 | ||||||
| from ._state cimport has_head, get_left, get_right | from ._state cimport has_head, get_left, get_right | ||||||
| from ._state cimport count_left_kids, count_right_kids | from ._state cimport count_left_kids, count_right_kids | ||||||
| 
 | 
 | ||||||
|  | @ -69,6 +70,8 @@ cdef int fill_context(atom_t* context, State* state) except -1: | ||||||
|     fill_token(&context[P1w], get_p1(state)) |     fill_token(&context[P1w], get_p1(state)) | ||||||
|     fill_token(&context[P2w], get_p2(state)) |     fill_token(&context[P2w], get_p2(state)) | ||||||
| 
 | 
 | ||||||
|  |     fill_token(&context[E0w], get_e0(state)) | ||||||
|  |     fill_token(&context[E1w], get_e1(state)) | ||||||
|     if state.stack_len >= 1: |     if state.stack_len >= 1: | ||||||
|         context[dist] = state.stack[0] - state.i |         context[dist] = state.stack[0] - state.i | ||||||
|     else: |     else: | ||||||
|  | @ -135,6 +138,32 @@ ner = ( | ||||||
| 
 | 
 | ||||||
|     (P1c, N0c), |     (P1c, N0c), | ||||||
|     (N0c, N1c), |     (N0c, N1c), | ||||||
|  | 
 | ||||||
|  |     (E0w,), | ||||||
|  |     (E0c,), | ||||||
|  |     (E0p,), | ||||||
|  | 
 | ||||||
|  |     (E0w, N0w), | ||||||
|  |     (E0c, N0w), | ||||||
|  |     (E0p, N0w), | ||||||
|  | 
 | ||||||
|  |     (E0p, P1p, N0p), | ||||||
|  |     (E0c, P1c, N0c), | ||||||
|  | 
 | ||||||
|  |     (E0w, P1c), | ||||||
|  |     (E0p, P1p), | ||||||
|  |     (E0c, P1c), | ||||||
|  |     (E0p, E1p), | ||||||
|  |     (E0c, P1p), | ||||||
|  | 
 | ||||||
|  |     (E1w,), | ||||||
|  |     (E1c,), | ||||||
|  |     (E1p,), | ||||||
|  | 
 | ||||||
|  |     (E0w, E1w), | ||||||
|  |     (E0w, E1p,), | ||||||
|  |     (E0p, E1w,), | ||||||
|  |     (E0p, E1w), | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -54,6 +54,19 @@ cdef inline TokenC* get_p2(const State* s) nogil: | ||||||
|         return &s.sent[s.i-2] |         return &s.sent[s.i-2] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | cdef inline TokenC* get_e0(const State* s) nogil: | ||||||
|  |     if s.ent.end != 0: | ||||||
|  |         return NULL | ||||||
|  |     else: | ||||||
|  |         return &s.sent[s.ent.start] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | cdef inline TokenC* get_e1(const State* s) nogil: | ||||||
|  |     if s.ent.end != 0 or s.ent.start >= (s.i + 1): | ||||||
|  |         return NULL | ||||||
|  |     else: | ||||||
|  |         return &s.sent[s.ent.start + 1] | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| cdef inline TokenC* get_n2(const State* s) nogil: | cdef inline TokenC* get_n2(const State* s) nogil: | ||||||
|     if (s.i + 2) >= s.sent_len: |     if (s.i + 2) >= s.sent_len: | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user