mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			21 lines
		
	
	
		
			598 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			21 lines
		
	
	
		
			598 B
		
	
	
	
		
			Python
		
	
	
	
	
	
import pytest
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("text", ["auf'm", "du's", "über'm", "wir's"])
 | 
						|
def test_de_tokenizer_splits_contractions(de_tokenizer, text):
 | 
						|
    tokens = de_tokenizer(text)
 | 
						|
    assert len(tokens) == 2
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("text", ["z.B.", "d.h.", "Jan.", "Dez.", "Chr."])
 | 
						|
def test_de_tokenizer_handles_abbr(de_tokenizer, text):
 | 
						|
    tokens = de_tokenizer(text)
 | 
						|
    assert len(tokens) == 1
 | 
						|
 | 
						|
 | 
						|
def test_de_tokenizer_handles_exc_in_text(de_tokenizer):
 | 
						|
    text = "Ich bin z.Zt. im Urlaub."
 | 
						|
    tokens = de_tokenizer(text)
 | 
						|
    assert len(tokens) == 6
 | 
						|
    assert tokens[2].text == "z.Zt."
 |