mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 18:07:26 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			14 lines
		
	
	
		
			408 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			14 lines
		
	
	
		
			408 B
		
	
	
	
		
			Python
		
	
	
	
	
	
# coding: utf8
 | 
						|
from __future__ import unicode_literals
 | 
						|
 | 
						|
import pytest
 | 
						|
 | 
						|
TOKENIZER_TESTS = [
 | 
						|
        ("คุณรักผมไหม", ['คุณ', 'รัก', 'ผม', 'ไหม'])
 | 
						|
]
 | 
						|
 | 
						|
@pytest.mark.parametrize('text,expected_tokens', TOKENIZER_TESTS)
 | 
						|
def test_thai_tokenizer(th_tokenizer, text, expected_tokens):
 | 
						|
	tokens = [token.text for token in th_tokenizer(text)]
 | 
						|
	assert tokens == expected_tokens
 |