mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	* Fix zero-length spans
This commit is contained in:
		
							parent
							
								
									888c05a7fa
								
							
						
					
					
						commit
						4b3c96d76d
					
				|  | @ -23,9 +23,12 @@ cdef class Span: | ||||||
| 
 | 
 | ||||||
|         self.doc = tokens |         self.doc = tokens | ||||||
|         self.start = start |         self.start = start | ||||||
|         self.start_char = self.doc[start].idx |         self.start_char = self.doc[start].idx if start < self.doc.length else 0 | ||||||
|         self.end = end |         self.end = end | ||||||
|         self.end_char = self.doc[end - 1].idx + len(self.doc[end - 1]) |         if end >= 1: | ||||||
|  |             self.end_char = self.doc[end - 1].idx + len(self.doc[end - 1]) | ||||||
|  |         else: | ||||||
|  |             self.end_char = 0 | ||||||
|         self.label = label |         self.label = label | ||||||
|         self._vector = vector |         self._vector = vector | ||||||
|         self._vector_norm = vector_norm |         self._vector_norm = vector_norm | ||||||
|  | @ -81,7 +84,7 @@ cdef class Span: | ||||||
|         return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm) |         return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm) | ||||||
| 
 | 
 | ||||||
|     cpdef int _recalculate_indices(self) except -1: |     cpdef int _recalculate_indices(self) except -1: | ||||||
|         if self.end >= self.doc.length \ |         if self.end > self.doc.length \ | ||||||
|         or self.doc.c[self.start].idx != self.start_char \ |         or self.doc.c[self.start].idx != self.start_char \ | ||||||
|         or (self.doc.c[self.end-1].idx + self.doc.c[self.end-1].lex.length) != self.end_char: |         or (self.doc.c[self.end-1].idx + self.doc.c[self.end-1].lex.length) != self.end_char: | ||||||
|             start = token_by_start(self.doc.c, self.doc.length, self.start_char) |             start = token_by_start(self.doc.c, self.doc.length, self.start_char) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user