mirror of
https://github.com/explosion/spaCy.git
synced 2025-10-02 09:56:39 +03:00
Try 'context' concept in ChineseTokenizer
This commit is contained in:
parent
f168822857
commit
6f0eeefefd
|
@ -71,9 +71,9 @@ class ChineseTokenizer(DummyTokenizer):
|
|||
self.pkuseg_user_dict = pkuseg_user_dict
|
||||
self.pkuseg_seg = None
|
||||
self.jieba_seg = None
|
||||
self.configure_segmenter(segmenter)
|
||||
self.configure_segmenter(segmenter, _context=nlp._context)
|
||||
|
||||
def configure_segmenter(self, segmenter: str):
|
||||
def configure_segmenter(self, segmenter: str, *, _context: str=""):
|
||||
if segmenter not in Segmenter.values():
|
||||
warn_msg = Warnings.W103.format(
|
||||
lang="Chinese",
|
||||
|
@ -84,10 +84,13 @@ class ChineseTokenizer(DummyTokenizer):
|
|||
warnings.warn(warn_msg)
|
||||
self.segmenter = Segmenter.char
|
||||
self.jieba_seg = try_jieba_import(self.segmenter)
|
||||
if _context == "loading":
|
||||
self.pkuseg_seg = None
|
||||
else:
|
||||
self.pkuseg_seg = try_pkuseg_import(
|
||||
self.segmenter,
|
||||
pkuseg_model=self.pkuseg_model,
|
||||
pkuseg_user_dict=self.pkuseg_user_dict,
|
||||
pkuseg_user_dict=self.pkuseg_user_dict
|
||||
)
|
||||
|
||||
def __call__(self, text: str) -> Doc:
|
||||
|
|
Loading…
Reference in New Issue
Block a user