mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	fix tokenizer_exceptions in thai
This commit is contained in:
		
							parent
							
								
									a2bf4cc7bf
								
							
						
					
					
						commit
						2ea27d07f4
					
				| 
						 | 
					@ -1,9 +1,7 @@
 | 
				
			||||||
# encoding: utf8
 | 
					# encoding: utf8
 | 
				
			||||||
from __future__ import unicode_literals
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ..symbols import *
 | 
					from ...symbols import *
 | 
				
			||||||
from ..language_data import PRON_LEMMA
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
TOKENIZER_EXCEPTIONS = {
 | 
					TOKENIZER_EXCEPTIONS = {
 | 
				
			||||||
    "ม.ค.": [
 | 
					    "ม.ค.": [
 | 
				
			||||||
| 
						 | 
					@ -43,38 +41,3 @@ TOKENIZER_EXCEPTIONS = {
 | 
				
			||||||
        {ORTH: "ธ.ค.", LEMMA: "ธันวาคม"}
 | 
					        {ORTH: "ธ.ค.", LEMMA: "ธันวาคม"}
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# exceptions mapped to a single token containing only ORTH property
 | 
					 | 
				
			||||||
# example: {"string": [{ORTH: "string"}]}
 | 
					 | 
				
			||||||
# converted using strings_to_exc() util
 | 
					 | 
				
			||||||
'''
 | 
					 | 
				
			||||||
ORTH_ONLY = [
 | 
					 | 
				
			||||||
    "a.",
 | 
					 | 
				
			||||||
    "b.",
 | 
					 | 
				
			||||||
    "c.",
 | 
					 | 
				
			||||||
    "d.",
 | 
					 | 
				
			||||||
    "e.",
 | 
					 | 
				
			||||||
    "f.",
 | 
					 | 
				
			||||||
    "g.",
 | 
					 | 
				
			||||||
    "h.",
 | 
					 | 
				
			||||||
    "i.",
 | 
					 | 
				
			||||||
    "j.",
 | 
					 | 
				
			||||||
    "k.",
 | 
					 | 
				
			||||||
    "l.",
 | 
					 | 
				
			||||||
    "m.",
 | 
					 | 
				
			||||||
    "n.",
 | 
					 | 
				
			||||||
    "o.",
 | 
					 | 
				
			||||||
    "p.",
 | 
					 | 
				
			||||||
    "q.",
 | 
					 | 
				
			||||||
    "r.",
 | 
					 | 
				
			||||||
    "s.",
 | 
					 | 
				
			||||||
    "t.",
 | 
					 | 
				
			||||||
    "u.",
 | 
					 | 
				
			||||||
    "v.",
 | 
					 | 
				
			||||||
    "w.",
 | 
					 | 
				
			||||||
    "x.",
 | 
					 | 
				
			||||||
    "y.",
 | 
					 | 
				
			||||||
    "z."
 | 
					 | 
				
			||||||
]
 | 
					 | 
				
			||||||
'''
 | 
					 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user