mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			12 lines
		
	
	
		
			376 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			12 lines
		
	
	
		
			376 B
		
	
	
	
		
			Python
		
	
	
	
	
	
# coding: utf-8
 | 
						||
from __future__ import unicode_literals
 | 
						||
 | 
						||
 | 
						||
def test_issue3277(es_tokenizer):
 | 
						||
    """Test that hyphens are split correctly as prefixes."""
 | 
						||
    doc = es_tokenizer("—Yo me llamo... –murmuró el niño– Emilio Sánchez Pérez.")
 | 
						||
    assert len(doc) == 14
 | 
						||
    assert doc[0].text == "\u2014"
 | 
						||
    assert doc[5].text == "\u2013"
 | 
						||
    assert doc[9].text == "\u2013"
 |