mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Reorganise Hebrew language data
This commit is contained in:
		
							parent
							
								
									7f05e977fa
								
							
						
					
					
						commit
						a77c9fc60d
					
				| 
						 | 
					@ -1,10 +1,12 @@
 | 
				
			||||||
# encoding: utf8
 | 
					# coding: utf8
 | 
				
			||||||
from __future__ import unicode_literals, print_function
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from .stop_words import STOP_WORDS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from ..language_data import BASE_EXCEPTIONS
 | 
				
			||||||
from ..language import Language
 | 
					from ..language import Language
 | 
				
			||||||
from ..attrs import LANG
 | 
					from ..attrs import LANG
 | 
				
			||||||
 | 
					from ..util import update_exc
 | 
				
			||||||
from .language_data import *
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Hebrew(Language):
 | 
					class Hebrew(Language):
 | 
				
			||||||
| 
						 | 
					@ -14,8 +16,8 @@ class Hebrew(Language):
 | 
				
			||||||
        lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
 | 
					        lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
 | 
				
			||||||
        lex_attr_getters[LANG] = lambda text: 'he'
 | 
					        lex_attr_getters[LANG] = lambda text: 'he'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        tokenizer_exceptions = TOKENIZER_EXCEPTIONS
 | 
					        tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
 | 
				
			||||||
        stop_words = STOP_WORDS
 | 
					        stop_words = set(STOP_WORDS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
EXPORT = Hebrew
 | 
					__all__ = ['Hebrew']
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,6 +1,7 @@
 | 
				
			||||||
# encoding: utf8
 | 
					# encoding: utf8
 | 
				
			||||||
from __future__ import unicode_literals
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
STOP_WORDS = set("""
 | 
					STOP_WORDS = set("""
 | 
				
			||||||
אני
 | 
					אני
 | 
				
			||||||
את
 | 
					את
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user