* Use tokens from Jieba library

This commit is contained in:
Matthew Honnibal 2016-04-28 14:32:27 +02:00
parent 3186379253
commit 1ede19c75a

View File

@ -23,10 +23,11 @@ class CharacterTokenizer(Tokenizer):
def __call__(self, text): def __call__(self, text):
return self.tokens_from_list(list(text)) return self.tokens_from_list(list(text))
class Chinese(Language): class Chinese(Language):
lang = u'zh' lang = u'zh'
@classmethod @classmethod
def default_tokenizer(cls, package, vocab): def default_tokenizer(cls, package, vocab):
'''Return Jieba-wrapper tokenizer.''' '''Return Jieba-wrapper tokenizer.'''
return CharacterTokenizer.from_package(package, vocab) return JiebaTokenizer.from_package(package, vocab)