mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	* Add base classes for more languages * Add test for language class initialization Make sure language can be initialize – otherwise, it's difficult to catch serious errors in the test suite, because languages are lazy-loaded
		
			
				
	
	
		
			62 lines
		
	
	
		
			348 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			62 lines
		
	
	
		
			348 B
		
	
	
	
		
			Python
		
	
	
	
	
	
| # coding: utf8
 | |
| from __future__ import unicode_literals
 | |
| 
 | |
| 
 | |
| # Source: https://github.com/stopwords-iso/stopwords-af
 | |
| 
 | |
| STOP_WORDS = set(
 | |
|     """
 | |
| 'n
 | |
| aan
 | |
| af
 | |
| al
 | |
| as
 | |
| baie
 | |
| by
 | |
| daar
 | |
| dag
 | |
| dat
 | |
| die
 | |
| dit
 | |
| een
 | |
| ek
 | |
| en
 | |
| gaan
 | |
| gesê
 | |
| haar
 | |
| het
 | |
| hom
 | |
| hulle
 | |
| hy
 | |
| in
 | |
| is
 | |
| jou
 | |
| jy
 | |
| kan
 | |
| kom
 | |
| ma
 | |
| maar
 | |
| met
 | |
| my
 | |
| na
 | |
| nie
 | |
| om
 | |
| ons
 | |
| op
 | |
| saam
 | |
| sal
 | |
| se
 | |
| sien
 | |
| so
 | |
| sy
 | |
| te
 | |
| toe
 | |
| uit
 | |
| van
 | |
| vir
 | |
| was
 | |
| wat
 | |
| ʼn
 | |
| """.split()
 | |
| )
 |