mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* Fix EMPTY_TOKEN
This commit is contained in:
		
							parent
							
								
									3819a88e1b
								
							
						
					
					
						commit
						9f17467c2e
					
				| 
						 | 
					@ -16,9 +16,6 @@ cdef struct TokenC:
 | 
				
			||||||
    int sense
 | 
					    int sense
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef TokenC EMPTY_TOKEN = TokenC(&EMPTY_LEXEME, 0, 0, 0)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
cdef class Tokens:
 | 
					cdef class Tokens:
 | 
				
			||||||
    cdef Pool mem
 | 
					    cdef Pool mem
 | 
				
			||||||
    cdef StringStore _string_store
 | 
					    cdef StringStore _string_store
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -43,7 +43,7 @@ cdef class Tokens:
 | 
				
			||||||
        data_start = <TokenC*>self.mem.alloc(size + (PADDING*2), sizeof(TokenC))
 | 
					        data_start = <TokenC*>self.mem.alloc(size + (PADDING*2), sizeof(TokenC))
 | 
				
			||||||
        cdef int i
 | 
					        cdef int i
 | 
				
			||||||
        for i in range(size + (PADDING*2)):
 | 
					        for i in range(size + (PADDING*2)):
 | 
				
			||||||
            data_start[i] = EMPTY_TOKEN
 | 
					            data_start[i].lex = &EMPTY_LEXEME
 | 
				
			||||||
        self.data = data_start + PADDING
 | 
					        self.data = data_start + PADDING
 | 
				
			||||||
        self.max_length = size
 | 
					        self.max_length = size
 | 
				
			||||||
        self.length = 0
 | 
					        self.length = 0
 | 
				
			||||||
| 
						 | 
					@ -86,10 +86,7 @@ cdef class Tokens:
 | 
				
			||||||
        return idx
 | 
					        return idx
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cpdef int set_tag(self, int i, int tag_type, int tag) except -1:
 | 
					    cpdef int set_tag(self, int i, int tag_type, int tag) except -1:
 | 
				
			||||||
        if tag_type == POS:
 | 
					        self.data[i].pos = tag
 | 
				
			||||||
            self.pos[i] = tag
 | 
					 | 
				
			||||||
        elif tag_type == ENTITY:
 | 
					 | 
				
			||||||
            self.ner[i] = tag
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @cython.boundscheck(False)
 | 
					    @cython.boundscheck(False)
 | 
				
			||||||
    cpdef np.ndarray[long, ndim=2] get_array(self, list attr_ids):
 | 
					    cpdef np.ndarray[long, ndim=2] get_array(self, list attr_ids):
 | 
				
			||||||
| 
						 | 
					@ -116,12 +113,17 @@ cdef class Tokens:
 | 
				
			||||||
    def _realloc(self, new_size):
 | 
					    def _realloc(self, new_size):
 | 
				
			||||||
        self.max_length = new_size
 | 
					        self.max_length = new_size
 | 
				
			||||||
        n = new_size + (PADDING * 2)
 | 
					        n = new_size + (PADDING * 2)
 | 
				
			||||||
 | 
					        # What we're storing is a "padded" array. We've jumped forward PADDING
 | 
				
			||||||
 | 
					        # places, and are storing the pointer to that. This way, we can access
 | 
				
			||||||
 | 
					        # words out-of-bounds, and get out-of-bounds markers.
 | 
				
			||||||
 | 
					        # Now that we want to realloc, we need the address of the true start,
 | 
				
			||||||
 | 
					        # so we jump the pointer back PADDING places.
 | 
				
			||||||
        cdef TokenC* data_start = self.data - PADDING
 | 
					        cdef TokenC* data_start = self.data - PADDING
 | 
				
			||||||
        data_start = <TokenC*>self.mem.realloc(data_start, n * sizeof(TokenC))
 | 
					        data_start = <TokenC*>self.mem.realloc(data_start, n * sizeof(TokenC))
 | 
				
			||||||
        self.data = data_start + PADDING
 | 
					        self.data = data_start + PADDING
 | 
				
			||||||
        cdef int i
 | 
					        cdef int i
 | 
				
			||||||
        for i in range(self.length, self.max_length + PADDING):
 | 
					        for i in range(self.length, self.max_length + PADDING):
 | 
				
			||||||
            self.data[i] = EMPTY_TOKEN
 | 
					            self.data[i].lex = &EMPTY_LEXEME
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@cython.freelist(64)
 | 
					@cython.freelist(64)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user