mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			24 lines
		
	
	
		
			555 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			24 lines
		
	
	
		
			555 B
		
	
	
	
		
			Python
		
	
	
	
	
	
import pytest
 | 
						|
 | 
						|
DOT_TESTS = [
 | 
						|
    ("tel.", ["tel", "."]),
 | 
						|
    ("0 zł 99 gr", ["0", "zł", "99", "gr"]),
 | 
						|
]
 | 
						|
 | 
						|
HYPHEN_TESTS = [
 | 
						|
    ("cztero-", ["cztero-"]),
 | 
						|
    ("jedno-", ["jedno-"]),
 | 
						|
    ("dwu-", ["dwu-"]),
 | 
						|
    ("trzy-", ["trzy-"]),
 | 
						|
]
 | 
						|
 | 
						|
 | 
						|
TESTCASES = DOT_TESTS + HYPHEN_TESTS
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("text,expected_tokens", TESTCASES)
 | 
						|
def test_tokenizer_handles_testcases(pl_tokenizer, text, expected_tokens):
 | 
						|
    tokens = pl_tokenizer(text)
 | 
						|
    token_list = [token.text for token in tokens if not token.is_space]
 | 
						|
    assert expected_tokens == token_list
 |