mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Don't split tokens with digits and "/" infixes (resolves #740)
This commit is contained in:
		
							parent
							
								
									e9e99a5670
								
							
						
					
					
						commit
						0894b8c0ef
					
				| 
						 | 
				
			
			@ -103,7 +103,7 @@ TOKENIZER_SUFFIXES = (
 | 
			
		|||
TOKENIZER_INFIXES = (
 | 
			
		||||
    LIST_ELLIPSES +
 | 
			
		||||
    [
 | 
			
		||||
        r'(?<=[0-9])[+\-\*/^](?=[0-9])',
 | 
			
		||||
        r'(?<=[0-9])[+\-\*^](?=[0-9-])',
 | 
			
		||||
        r'(?<=[{al}])\.(?=[{au}])'.format(al=ALPHA_LOWER, au=ALPHA_UPPER),
 | 
			
		||||
        r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA),
 | 
			
		||||
        r'(?<=[{a}])(?:{h})(?=[{a}])'.format(a=ALPHA, h=HYPHENS),
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user