* Use Jieba tokenizer in Chinese class

This commit is contained in:
Matthew Honnibal 2016-04-24 19:11:38 +02:00
parent 9bfe20cac9
commit b6ccd8d76a

View File

@ -1,5 +1,13 @@
from ..language import Language
from .jieba import JiebaTokenizer
class Chinese(Language):
lang = u'zh'
@classmethod
def default_tokenizer(cls, package, vocab):
'''Return Jieba-wrapper tokenizer.'''
return JiebaTokenizer.from_package(package, vocab)