mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	* Use tokens from Jieba library
This commit is contained in:
		
							parent
							
								
									3186379253
								
							
						
					
					
						commit
						1ede19c75a
					
				| 
						 | 
					@ -23,10 +23,11 @@ class CharacterTokenizer(Tokenizer):
 | 
				
			||||||
    def __call__(self, text):
 | 
					    def __call__(self, text):
 | 
				
			||||||
        return self.tokens_from_list(list(text))
 | 
					        return self.tokens_from_list(list(text))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Chinese(Language):
 | 
					class Chinese(Language):
 | 
				
			||||||
    lang = u'zh'
 | 
					    lang = u'zh'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
    def default_tokenizer(cls, package, vocab):
 | 
					    def default_tokenizer(cls, package, vocab):
 | 
				
			||||||
        '''Return Jieba-wrapper tokenizer.'''
 | 
					        '''Return Jieba-wrapper tokenizer.'''
 | 
				
			||||||
        return CharacterTokenizer.from_package(package, vocab)
 | 
					        return JiebaTokenizer.from_package(package, vocab)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user