mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Changed tokenizer to add infix when infix_start is offset
This commit is contained in:
		
							parent
							
								
									8be3392302
								
							
						
					
					
						commit
						33b0f86de3
					
				|  | @ -241,11 +241,10 @@ cdef class Tokenizer: | |||
|                     for match in matches: | ||||
|                         infix_start = match.start() | ||||
|                         infix_end = match.end() | ||||
|                         if infix_start == start: | ||||
|                             continue | ||||
| 
 | ||||
|                         span = string[start:infix_start] | ||||
|                         tokens.push_back(self.vocab.get(tokens.mem, span), False) | ||||
|                         if infix_start != start: | ||||
|                             span = string[start:infix_start] | ||||
|                             tokens.push_back(self.vocab.get(tokens.mem, span), False) | ||||
| 
 | ||||
|                         if infix_start != infix_end: | ||||
|                             # If infix_start != infix_end, it means the infix | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user