mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-04 21:50:35 +03:00
* Use tokens from Jieba library
This commit is contained in:
parent
3186379253
commit
1ede19c75a
|
@ -23,10 +23,11 @@ class CharacterTokenizer(Tokenizer):
|
|||
def __call__(self, text):
|
||||
return self.tokens_from_list(list(text))
|
||||
|
||||
|
||||
class Chinese(Language):
|
||||
lang = u'zh'
|
||||
|
||||
@classmethod
|
||||
def default_tokenizer(cls, package, vocab):
|
||||
'''Return Jieba-wrapper tokenizer.'''
|
||||
return CharacterTokenizer.from_package(package, vocab)
|
||||
return JiebaTokenizer.from_package(package, vocab)
|
||||
|
|
Loading…
Reference in New Issue
Block a user