mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-26 21:51:24 +03:00 
			
		
		
		
	* Have SBD return start/end indices
This commit is contained in:
		
							parent
							
								
									b183dff72d
								
							
						
					
					
						commit
						a27b23cc8f
					
				|  | @ -92,15 +92,18 @@ cdef class Tokens: | ||||||
|         cdef attr_t period = self.vocab.strings['.'] |         cdef attr_t period = self.vocab.strings['.'] | ||||||
|         cdef attr_t question = self.vocab.strings['?'] |         cdef attr_t question = self.vocab.strings['?'] | ||||||
|         cdef attr_t exclamation = self.vocab.strings['!'] |         cdef attr_t exclamation = self.vocab.strings['!'] | ||||||
|  |         spans = [] | ||||||
|  |         start = None | ||||||
|         for i in range(self.length): |         for i in range(self.length): | ||||||
|             sent.push_back(self.data[i].idx, &self.data[i]) |             if start is None: | ||||||
|  |                 start = i | ||||||
|             if self.data[i].lex.sic == period or self.data[i].lex.sic == exclamation or \ |             if self.data[i].lex.sic == period or self.data[i].lex.sic == exclamation or \ | ||||||
|               self.data[i].lex.sic == question: |               self.data[i].lex.sic == question: | ||||||
|                 sentences.append(sent) |                 spans.append((start, i+1)) | ||||||
|                 sent = Tokens(self.vocab, self._string[self.data[i].idx:]) |                 start = None | ||||||
|         if sent.length: |         if start is not None: | ||||||
|             sentences.append(sent) |             spans.append((start, self.length)) | ||||||
|         return sentences |         return spans | ||||||
| 
 | 
 | ||||||
|     def __getitem__(self, i): |     def __getitem__(self, i): | ||||||
|         """Retrieve a token. |         """Retrieve a token. | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user