mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			28 lines
		
	
	
		
			591 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			28 lines
		
	
	
		
			591 B
		
	
	
	
		
			Python
		
	
	
	
	
	
# coding: utf8
 | 
						||
from __future__ import unicode_literals
 | 
						||
 | 
						||
from ...symbols import ORTH, NORM
 | 
						||
 | 
						||
 | 
						||
# These exceptions are mostly for example purposes – hoping that Turkish
 | 
						||
# speakers can contribute in the future! Source of copy-pasted examples:
 | 
						||
# https://en.wiktionary.org/wiki/Category:Turkish_language
 | 
						||
 | 
						||
_exc = {
 | 
						||
    "sağol": [
 | 
						||
        {ORTH: "sağ"},
 | 
						||
        {ORTH: "ol", NORM: "olun"}]
 | 
						||
}
 | 
						||
 | 
						||
 | 
						||
for exc_data in [
 | 
						||
    {ORTH: "A.B.D.", NORM: "Amerika Birleşik Devletleri"}]:
 | 
						||
    _exc[exc_data[ORTH]] = [exc_data]
 | 
						||
 | 
						||
 | 
						||
for orth in ["Dr."]:
 | 
						||
    _exc[orth] = [{ORTH: orth}]
 | 
						||
 | 
						||
 | 
						||
TOKENIZER_EXCEPTIONS = _exc
 |