mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* Add support for sentence breaks in stateclass
This commit is contained in:
		
							parent
							
								
									b7b18c279d
								
							
						
					
					
						commit
						495f528709
					
				| 
						 | 
					@ -16,10 +16,10 @@ cdef class StateClass:
 | 
				
			||||||
    cdef Entity* _ents
 | 
					    cdef Entity* _ents
 | 
				
			||||||
    cdef TokenC _empty_token
 | 
					    cdef TokenC _empty_token
 | 
				
			||||||
    cdef int length
 | 
					    cdef int length
 | 
				
			||||||
    cdef bint at_sent_end
 | 
					 | 
				
			||||||
    cdef int _s_i
 | 
					    cdef int _s_i
 | 
				
			||||||
    cdef int _b_i
 | 
					    cdef int _b_i
 | 
				
			||||||
    cdef int _e_i
 | 
					    cdef int _e_i
 | 
				
			||||||
 | 
					    cdef int _break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @staticmethod
 | 
					    @staticmethod
 | 
				
			||||||
    cdef inline StateClass init(const TokenC* sent, int length):
 | 
					    cdef inline StateClass init(const TokenC* sent, int length):
 | 
				
			||||||
| 
						 | 
					@ -93,6 +93,6 @@ cdef class StateClass:
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    cdef void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil
 | 
					    cdef void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef void set_sent_end(self, int i) nogil
 | 
					    cdef void set_break(self, int i) nogil
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef void clone(self, StateClass src) nogil
 | 
					    cdef void clone(self, StateClass src) nogil
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -14,6 +14,7 @@ cdef class StateClass:
 | 
				
			||||||
        self._ents = <Entity*>mem.alloc(length, sizeof(Entity))
 | 
					        self._ents = <Entity*>mem.alloc(length, sizeof(Entity))
 | 
				
			||||||
        self.mem = mem
 | 
					        self.mem = mem
 | 
				
			||||||
        self.length = length
 | 
					        self.length = length
 | 
				
			||||||
 | 
					        self._break = length
 | 
				
			||||||
        self._s_i = 0
 | 
					        self._s_i = 0
 | 
				
			||||||
        self._b_i = 0
 | 
					        self._b_i = 0
 | 
				
			||||||
        self._e_i = 0
 | 
					        self._e_i = 0
 | 
				
			||||||
| 
						 | 
					@ -104,10 +105,10 @@ cdef class StateClass:
 | 
				
			||||||
        return self._s_i <= 0
 | 
					        return self._s_i <= 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef bint eol(self) nogil:
 | 
					    cdef bint eol(self) nogil:
 | 
				
			||||||
        return self._b_i >= self.length or self.at_sent_end
 | 
					        return self._b_i >= self._break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef bint is_final(self) nogil:
 | 
					    cdef bint is_final(self) nogil:
 | 
				
			||||||
        return self.stack_depth() <= 1 and self.buffer_length() == 0
 | 
					        return self.stack_depth() <= 1 and self._b_i >= self.length
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef bint has_head(self, int i) nogil:
 | 
					    cdef bint has_head(self, int i) nogil:
 | 
				
			||||||
        return self.safe_get(i).head != 0
 | 
					        return self.safe_get(i).head != 0
 | 
				
			||||||
| 
						 | 
					@ -130,12 +131,14 @@ cdef class StateClass:
 | 
				
			||||||
        return self._s_i
 | 
					        return self._s_i
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef int buffer_length(self) nogil:
 | 
					    cdef int buffer_length(self) nogil:
 | 
				
			||||||
        return self.length - self._b_i
 | 
					        return self._break - self._b_i
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef void push(self) nogil:
 | 
					    cdef void push(self) nogil:
 | 
				
			||||||
        self._stack[self._s_i] = self.B(0)
 | 
					        self._stack[self._s_i] = self.B(0)
 | 
				
			||||||
        self._s_i += 1
 | 
					        self._s_i += 1
 | 
				
			||||||
        self._b_i += 1
 | 
					        self._b_i += 1
 | 
				
			||||||
 | 
					        if self._b_i >= self._break:
 | 
				
			||||||
 | 
					            self._break = self.length
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef void pop(self) nogil:
 | 
					    cdef void pop(self) nogil:
 | 
				
			||||||
        self._s_i -= 1
 | 
					        self._s_i -= 1
 | 
				
			||||||
| 
						 | 
					@ -182,9 +185,10 @@ cdef class StateClass:
 | 
				
			||||||
            self._sent[i].ent_iob = ent_iob
 | 
					            self._sent[i].ent_iob = ent_iob
 | 
				
			||||||
            self._sent[i].ent_type = ent_type
 | 
					            self._sent[i].ent_type = ent_type
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef void set_sent_end(self, int i) nogil:
 | 
					    cdef void set_break(self, int i) nogil:
 | 
				
			||||||
        if 0 <= i < self.length:
 | 
					        if 0 <= i < self.length:
 | 
				
			||||||
            self._sent[i].sent_end = True
 | 
					            self._sent[i].sent_end = True
 | 
				
			||||||
 | 
					            self._break = i
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef void clone(self, StateClass src) nogil:
 | 
					    cdef void clone(self, StateClass src) nogil:
 | 
				
			||||||
        memcpy(self._sent, src._sent, self.length * sizeof(TokenC))
 | 
					        memcpy(self._sent, src._sent, self.length * sizeof(TokenC))
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user