From 1ede19c75a5728dcbdc117acf20bb6e13d0a1773 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 28 Apr 2016 14:32:27 +0200 Subject: [PATCH] * Use tokens from Jieba library --- spacy/zh/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spacy/zh/__init__.py b/spacy/zh/__init__.py index 2667d5442..2b4a696dc 100644 --- a/spacy/zh/__init__.py +++ b/spacy/zh/__init__.py @@ -23,10 +23,11 @@ class CharacterTokenizer(Tokenizer): def __call__(self, text): return self.tokens_from_list(list(text)) + class Chinese(Language): lang = u'zh' @classmethod def default_tokenizer(cls, package, vocab): '''Return Jieba-wrapper tokenizer.''' - return CharacterTokenizer.from_package(package, vocab) + return JiebaTokenizer.from_package(package, vocab)