mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 07:27:28 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			21 lines
		
	
	
		
			621 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			21 lines
		
	
	
		
			621 B
		
	
	
	
		
			Python
		
	
	
	
	
	
| import pytest
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize("text", ["ق.م", "إلخ", "ص.ب", "ت."])
 | |
| def test_ar_tokenizer_handles_abbr(ar_tokenizer, text):
 | |
|     tokens = ar_tokenizer(text)
 | |
|     assert len(tokens) == 1
 | |
| 
 | |
| 
 | |
| def test_ar_tokenizer_handles_exc_in_text(ar_tokenizer):
 | |
|     text = "تعود الكتابة الهيروغليفية إلى سنة 3200 ق.م"
 | |
|     tokens = ar_tokenizer(text)
 | |
|     assert len(tokens) == 7
 | |
|     assert tokens[6].text == "ق.م"
 | |
| 
 | |
| 
 | |
| def test_ar_tokenizer_handles_exc_in_text_2(ar_tokenizer):
 | |
|     text = "يبلغ طول مضيق طارق 14كم "
 | |
|     tokens = ar_tokenizer(text)
 | |
|     assert len(tokens) == 6
 |