mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 13:11:03 +03:00 
			
		
		
		
	* luganda language extension * __init__.py changes * New enhancements * Lexical attribute changed * punctuaction and sentence additions * Remove comment header * Fix typos, reformat * reformated version * Add tokenizer test * Remove contractions from stop words * Format * Add Luganda to website Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
		
			
				
	
	
		
			16 lines
		
	
	
		
			449 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			16 lines
		
	
	
		
			449 B
		
	
	
	
		
			Python
		
	
	
	
	
	
| import pytest
 | ||
| 
 | ||
| LG_BASIC_TOKENIZATION_TESTS = [
 | ||
|     (
 | ||
|         "Abooluganda ab’emmamba ababiri",
 | ||
|         ["Abooluganda", "ab’emmamba", "ababiri"],
 | ||
|     ),
 | ||
| ]
 | ||
| 
 | ||
| 
 | ||
| @pytest.mark.parametrize("text,expected_tokens", LG_BASIC_TOKENIZATION_TESTS)
 | ||
| def test_lg_tokenizer_basic(lg_tokenizer, text, expected_tokens):
 | ||
|     tokens = lg_tokenizer(text)
 | ||
|     token_list = [token.text for token in tokens if not token.is_space]
 | ||
|     assert expected_tokens == token_list
 |