mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	update th
This commit is contained in:
		
							parent
							
								
									44291f6697
								
							
						
					
					
						commit
						39bb5690f0
					
				|  | @ -25,6 +25,4 @@ class Thai(Language): | |||
| 			raise ImportError("The Thai tokenizer requires the PyThaiNLP library: " | ||||
| 								"https://github.com/wannaphongcom/pythainlp/") | ||||
| 		words = [x for x in list(word_tokenize(text,"newmm"))] | ||||
| 		return Doc(self.vocab, words=words, spaces=[False]*len(words)) | ||||
| 
 | ||||
| __all__ = ['Thai'] | ||||
| 		return Doc(self.vocab, words=words, spaces=[False]*len(words)) | ||||
|  | @ -42,39 +42,4 @@ TOKENIZER_EXCEPTIONS = { | |||
|     "ธ.ค.": [ | ||||
|         {ORTH: "ธ.ค.", LEMMA: "ธันวาคม"} | ||||
|     ] | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| # exceptions mapped to a single token containing only ORTH property | ||||
| # example: {"string": [{ORTH: "string"}]} | ||||
| # converted using strings_to_exc() util | ||||
| ''' | ||||
| ORTH_ONLY = [ | ||||
|     "a.", | ||||
|     "b.", | ||||
|     "c.", | ||||
|     "d.", | ||||
|     "e.", | ||||
|     "f.", | ||||
|     "g.", | ||||
|     "h.", | ||||
|     "i.", | ||||
|     "j.", | ||||
|     "k.", | ||||
|     "l.", | ||||
|     "m.", | ||||
|     "n.", | ||||
|     "o.", | ||||
|     "p.", | ||||
|     "q.", | ||||
|     "r.", | ||||
|     "s.", | ||||
|     "t.", | ||||
|     "u.", | ||||
|     "v.", | ||||
|     "w.", | ||||
|     "x.", | ||||
|     "y.", | ||||
|     "z." | ||||
| ] | ||||
| ''' | ||||
| } | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user