mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	add th test
This commit is contained in:
		
							parent
							
								
									39bb5690f0
								
							
						
					
					
						commit
						1abf472068
					
				|  | @ -15,6 +15,7 @@ from ..fi import Finnish | |||
| from ..bn import Bengali | ||||
| from ..he import Hebrew | ||||
| from ..nb import Norwegian | ||||
| from ..th import Thai | ||||
| 
 | ||||
| 
 | ||||
| from ..tokens import Doc | ||||
|  | @ -101,6 +102,11 @@ def he_tokenizer(): | |||
| def nb_tokenizer(): | ||||
|     return Norwegian.Defaults.create_tokenizer() | ||||
| 
 | ||||
| @pytest.fixture | ||||
| def th_tokenizer(): | ||||
|     pythainlp = pytest.importorskip("pythainlp") | ||||
|     return Thai.Defaults.create_tokenizer() | ||||
| 
 | ||||
| @pytest.fixture | ||||
| def stringstore(): | ||||
|     return StringStore() | ||||
|  |  | |||
							
								
								
									
										13
									
								
								spacy/tests/th/test_tokenizer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								spacy/tests/th/test_tokenizer.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,13 @@ | |||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| import pytest | ||||
| 
 | ||||
| TOKENIZER_TESTS = [ | ||||
|         ("คุณรักผมไหม", ['คุณ', 'รัก', 'ผม', 'ไหม']) | ||||
| ] | ||||
| 
 | ||||
| @pytest.mark.parametrize('text,expected_tokens', TOKENIZER_TESTS) | ||||
| def test_thai_tokenizer(th_tokenizer, text, expected_tokens): | ||||
|     tokens = [token.text for token in th_tokenizer(text)] | ||||
|     assert tokens == expected_tokens | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user