mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 13:11:03 +03:00 
			
		
		
		
	* `strings`: More roubust type checking of keys/IDs, coerce `int`-like types to `hash_t` * Preserve existing public API behaviour * Fix return type * Replace `bool` with `bint`, rename to `_try_coerce_to_hash`, replace `id` with `hash` * Avoid unnecessary re-encoding and re-calculation of strings and hashs respectively * Rename variables named `hash` Add comment on early return
		
			
				
	
	
		
			30 lines
		
	
	
		
			727 B
		
	
	
	
		
			Cython
		
	
	
	
	
	
			
		
		
	
	
			30 lines
		
	
	
		
			727 B
		
	
	
	
		
			Cython
		
	
	
	
	
	
| from libc.stdint cimport int64_t
 | |
| from libcpp.vector cimport vector
 | |
| from libcpp.set cimport set
 | |
| from cymem.cymem cimport Pool
 | |
| from preshed.maps cimport PreshMap
 | |
| from murmurhash.mrmr cimport hash64
 | |
| 
 | |
| from .typedefs cimport attr_t, hash_t
 | |
| 
 | |
| 
 | |
| cpdef hash_t hash_string(str string) except 0
 | |
| cdef hash_t hash_utf8(char* utf8_string, int length) nogil
 | |
| 
 | |
| cdef str decode_Utf8Str(const Utf8Str* string)
 | |
| 
 | |
| 
 | |
| ctypedef union Utf8Str:
 | |
|     unsigned char[8] s
 | |
|     unsigned char* p
 | |
| 
 | |
| 
 | |
| cdef class StringStore:
 | |
|     cdef Pool mem
 | |
| 
 | |
|     cdef vector[hash_t] keys
 | |
|     cdef public PreshMap _map
 | |
| 
 | |
|     cdef const Utf8Str* intern_unicode(self, str py_string)
 | |
|     cdef const Utf8Str* _intern_utf8(self, char* utf8_string, int length, hash_t* precalculated_hash)
 |