mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* Improve interface for PointerHash
This commit is contained in:
		
							parent
							
								
									45865be37e
								
							
						
					
					
						commit
						f3393cf57c
					
				| 
						 | 
					@ -1,7 +1,7 @@
 | 
				
			||||||
from libc.stdint cimport uint64_t
 | 
					from libc.stdint cimport uint64_t
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ctypedef uint64_t key_t
 | 
					ctypedef uint64_t key_t
 | 
				
			||||||
ctypedef size_t val_t
 | 
					ctypedef void* val_t
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef struct Cell:
 | 
					cdef struct Cell:
 | 
				
			||||||
| 
						 | 
					@ -15,6 +15,6 @@ cdef class PointerHash:
 | 
				
			||||||
    cdef Cell* _last
 | 
					    cdef Cell* _last
 | 
				
			||||||
    cdef Cell* cells
 | 
					    cdef Cell* cells
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef val_t lookup(self, key_t key)
 | 
					    cdef val_t get(self, key_t key)
 | 
				
			||||||
    cdef void insert(self, key_t key, val_t value) except *
 | 
					    cdef void set(self, key_t key, val_t value) except *
 | 
				
			||||||
    cdef void resize(self, size_t new_size) except *
 | 
					    cdef void resize(self, size_t new_size) except *
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -25,7 +25,7 @@ cdef class Lexicon:
 | 
				
			||||||
    cpdef readonly size_t size
 | 
					    cpdef readonly size_t size
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cpdef Lexeme lookup(self, unicode string)
 | 
					    cpdef Lexeme lookup(self, unicode string)
 | 
				
			||||||
    cdef size_t get(self, String* s)
 | 
					    cdef LexemeC* get(self, String* s)
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    cdef PointerHash _dict
 | 
					    cdef PointerHash _dict
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -122,7 +122,6 @@ cdef class Language:
 | 
				
			||||||
        cdef int split
 | 
					        cdef int split
 | 
				
			||||||
        cdef int remaining = string.n
 | 
					        cdef int remaining = string.n
 | 
				
			||||||
        cdef String prefix
 | 
					        cdef String prefix
 | 
				
			||||||
        cdef Cell* tmp_cell
 | 
					 | 
				
			||||||
        while remaining >= 1:
 | 
					        while remaining >= 1:
 | 
				
			||||||
            split = self._split_one(string.chars, string.n)
 | 
					            split = self._split_one(string.chars, string.n)
 | 
				
			||||||
            remaining -= split
 | 
					            remaining -= split
 | 
				
			||||||
| 
						 | 
					@ -194,10 +193,11 @@ cdef class Lexicon:
 | 
				
			||||||
            self._dict.set(string.key, lexeme)
 | 
					            self._dict.set(string.key, lexeme)
 | 
				
			||||||
            self.size += 1
 | 
					            self.size += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef size_t get(self, String* string):
 | 
					    cdef LexemeC* get(self, String* string):
 | 
				
			||||||
        cdef LexemeC* lex_addr = <LexemeC*>self._dict.get(string.key)
 | 
					        cdef LexemeC* lexeme
 | 
				
			||||||
        if lex_addr != NULL:
 | 
					        lexeme = <LexemeC*>self._dict.get(string.key)
 | 
				
			||||||
            return <size_t>lex_addr
 | 
					        if lexeme != NULL:
 | 
				
			||||||
 | 
					            return lexeme
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        cdef unicode uni_string = string.chars[:string.n]
 | 
					        cdef unicode uni_string = string.chars[:string.n]
 | 
				
			||||||
        views = [string_view(uni_string, 0.0, 0, {}, {})
 | 
					        views = [string_view(uni_string, 0.0, 0, {}, {})
 | 
				
			||||||
| 
						 | 
					@ -207,10 +207,10 @@ cdef class Lexicon:
 | 
				
			||||||
            if flag_feature(uni_string, 0.0, {}, {}):
 | 
					            if flag_feature(uni_string, 0.0, {}, {}):
 | 
				
			||||||
                flags.add(i)
 | 
					                flags.add(i)
 | 
				
			||||||
 
 | 
					 
 | 
				
			||||||
        cdef LexemeC* lexeme = lexeme_init(uni_string, 0, 0, views, flags)
 | 
					        lexeme = lexeme_init(uni_string, 0, 0, views, flags)
 | 
				
			||||||
        self._dict.set(string.key, lexeme)
 | 
					        self._dict.set(string.key, lexeme)
 | 
				
			||||||
        self.size += 1
 | 
					        self.size += 1
 | 
				
			||||||
        return <size_t>lexeme
 | 
					        return lexeme
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cpdef Lexeme lookup(self, unicode uni_string):
 | 
					    cpdef Lexeme lookup(self, unicode uni_string):
 | 
				
			||||||
        """Retrieve (or create, if not found) a Lexeme for a string, and return it.
 | 
					        """Retrieve (or create, if not found) a Lexeme for a string, and return it.
 | 
				
			||||||
| 
						 | 
					@ -223,8 +223,8 @@ cdef class Lexicon:
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        cdef String string
 | 
					        cdef String string
 | 
				
			||||||
        string_from_unicode(&string, uni_string)
 | 
					        string_from_unicode(&string, uni_string)
 | 
				
			||||||
        cdef size_t lexeme = self.get(&string)
 | 
					        cdef LexemeC* lexeme = self.get(&string)
 | 
				
			||||||
        return Lexeme(lexeme)
 | 
					        return Lexeme(<size_t>lexeme)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef void string_from_unicode(String* s, unicode uni):
 | 
					cdef void string_from_unicode(String* s, unicode uni):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user