mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	* Adding Support for Yoruba * test text * Updated test string. * Fixing encoding declaration. * Adding encoding to stop_words.py * Added contributor agreement and removed iranlowo. * Added removed test files and removed iranlowo to keep project bare. * Returned CONTRIBUTING.md to default state. * Added delted conftest entries * Tidy up and auto-format * Revert CONTRIBUTING.md Co-authored-by: Ines Montani <ines@ines.io>
		
			
				
	
	
		
			13 lines
		
	
	
		
			664 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			13 lines
		
	
	
		
			664 B
		
	
	
	
		
			Python
		
	
	
	
	
	
| # coding: utf8
 | |
| from __future__ import unicode_literals
 | |
| 
 | |
| # stop words as whitespace-separated list.
 | |
| # Source: https://raw.githubusercontent.com/dohliam/more-stoplists/master/yo/yo.txt
 | |
| 
 | |
| STOP_WORDS = set(
 | |
|     "a an b bá bí bẹ̀rẹ̀ d e f fún fẹ́ g gbogbo i inú j jù jẹ jẹ́ k kan kì kí kò "
 | |
|     "l láti lè lọ m mi mo máa mọ̀ n ni náà ní nígbà nítorí nǹkan o p padà pé "
 | |
|     "púpọ̀ pẹ̀lú r rẹ̀ s sì sí sínú t ti tí u w wà wá wọn wọ́n y yìí à àti àwọn á "
 | |
|     "è é ì í ò òun ó ù ú ń ńlá ǹ ̀ ́ ̣ ṣ ṣe ṣé ṣùgbọ́n ẹ ẹmọ́ ọ ọjọ́ ọ̀pọ̀lọpọ̀".split()
 | |
| )
 |