mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-05 14:10:34 +03:00
* Use Jieba tokenizer in Chinese class
This commit is contained in:
parent
9bfe20cac9
commit
b6ccd8d76a
|
@ -1,5 +1,13 @@
|
||||||
from ..language import Language
|
from ..language import Language
|
||||||
|
from .jieba import JiebaTokenizer
|
||||||
|
|
||||||
|
|
||||||
class Chinese(Language):
|
class Chinese(Language):
|
||||||
lang = u'zh'
|
lang = u'zh'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def default_tokenizer(cls, package, vocab):
|
||||||
|
'''Return Jieba-wrapper tokenizer.'''
|
||||||
|
return JiebaTokenizer.from_package(package, vocab)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user