mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* Add encode_sense_strs function
This commit is contained in:
		
							parent
							
								
									1be5ab200f
								
							
						
					
					
						commit
						4a60b68a24
					
				| 
						 | 
					@ -59,5 +59,11 @@ cpdef enum:
 | 
				
			||||||
    N_SENSES
 | 
					    N_SENSES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES
 | 
					cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Can we not define this compile time? Have to hard code? :(
 | 
				
			||||||
 | 
					# DEF INVALID_SENSE = 1 << 54
 | 
				
			||||||
 | 
					cdef flags_t encode_sense_strs(sense_names) except 18014398509481984
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -86,3 +86,17 @@ STRINGS = (
 | 
				
			||||||
    'V_stative',
 | 
					    'V_stative',
 | 
				
			||||||
    'V_weather'
 | 
					    'V_weather'
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					IDS = dict((sense_str, i) for i, sense_str in enumerate(STRINGS))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cdef flags_t encode_sense_strs(sense_names) except 18014398509481984:
 | 
				
			||||||
 | 
					    cdef flags_t sense_bits = 0
 | 
				
			||||||
 | 
					    cdef flags_t sense_id = 0
 | 
				
			||||||
 | 
					    for sense_str in sense_names:
 | 
				
			||||||
 | 
					        if '.' in sense_str:
 | 
				
			||||||
 | 
					            sense_str = sense_str[0].upper() + '_' + sense_str.split('.')[1]
 | 
				
			||||||
 | 
					        if sense_str != 'N_Tops':
 | 
				
			||||||
 | 
					            sense_id = IDS[sense_str]
 | 
				
			||||||
 | 
					            sense_bits |= (1 << sense_id)
 | 
				
			||||||
 | 
					    return sense_bits
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user