mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
* Use tokens from Jieba library
This commit is contained in:
parent
3186379253
commit
1ede19c75a
|
@ -23,10 +23,11 @@ class CharacterTokenizer(Tokenizer):
|
||||||
def __call__(self, text):
|
def __call__(self, text):
|
||||||
return self.tokens_from_list(list(text))
|
return self.tokens_from_list(list(text))
|
||||||
|
|
||||||
|
|
||||||
class Chinese(Language):
|
class Chinese(Language):
|
||||||
lang = u'zh'
|
lang = u'zh'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def default_tokenizer(cls, package, vocab):
|
def default_tokenizer(cls, package, vocab):
|
||||||
'''Return Jieba-wrapper tokenizer.'''
|
'''Return Jieba-wrapper tokenizer.'''
|
||||||
return CharacterTokenizer.from_package(package, vocab)
|
return JiebaTokenizer.from_package(package, vocab)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user