mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	added prefix & suffix rules
This commit is contained in:
		
							parent
							
								
									ba922e30e8
								
							
						
					
					
						commit
						0e590c711f
					
				| 
						 | 
					@ -2,6 +2,7 @@
 | 
				
			||||||
from __future__ import unicode_literals
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .stop_words import STOP_WORDS
 | 
					from .stop_words import STOP_WORDS
 | 
				
			||||||
 | 
					from .punctuation import TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES
 | 
				
			||||||
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 | 
					from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 | 
				
			||||||
from .norm_exceptions import NORM_EXCEPTIONS
 | 
					from .norm_exceptions import NORM_EXCEPTIONS
 | 
				
			||||||
from .lex_attrs import LEX_ATTRS
 | 
					from .lex_attrs import LEX_ATTRS
 | 
				
			||||||
| 
						 | 
					@ -20,6 +21,8 @@ class IndonesianDefaults(Language.Defaults):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
 | 
					    tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
 | 
				
			||||||
    stop_words = set(STOP_WORDS)
 | 
					    stop_words = set(STOP_WORDS)
 | 
				
			||||||
 | 
					    prefixes = tuple(TOKENIZER_PREFIXES)
 | 
				
			||||||
 | 
					    suffixes = tuple(TOKENIZER_SUFFIXES)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Indonesian(Language):
 | 
					class Indonesian(Language):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user