diff --git a/spacy/zh/__init__.py b/spacy/zh/__init__.py index 2667d5442..2b4a696dc 100644 --- a/spacy/zh/__init__.py +++ b/spacy/zh/__init__.py @@ -23,10 +23,11 @@ class CharacterTokenizer(Tokenizer): def __call__(self, text): return self.tokens_from_list(list(text)) + class Chinese(Language): lang = u'zh' @classmethod def default_tokenizer(cls, package, vocab): '''Return Jieba-wrapper tokenizer.''' - return CharacterTokenizer.from_package(package, vocab) + return JiebaTokenizer.from_package(package, vocab)