mirror of
https://github.com/explosion/spaCy.git
synced 2025-10-02 09:56:39 +03:00
Try 'context' concept in ChineseTokenizer
This commit is contained in:
parent
f168822857
commit
6f0eeefefd
|
@ -71,9 +71,9 @@ class ChineseTokenizer(DummyTokenizer):
|
||||||
self.pkuseg_user_dict = pkuseg_user_dict
|
self.pkuseg_user_dict = pkuseg_user_dict
|
||||||
self.pkuseg_seg = None
|
self.pkuseg_seg = None
|
||||||
self.jieba_seg = None
|
self.jieba_seg = None
|
||||||
self.configure_segmenter(segmenter)
|
self.configure_segmenter(segmenter, _context=nlp._context)
|
||||||
|
|
||||||
def configure_segmenter(self, segmenter: str):
|
def configure_segmenter(self, segmenter: str, *, _context: str=""):
|
||||||
if segmenter not in Segmenter.values():
|
if segmenter not in Segmenter.values():
|
||||||
warn_msg = Warnings.W103.format(
|
warn_msg = Warnings.W103.format(
|
||||||
lang="Chinese",
|
lang="Chinese",
|
||||||
|
@ -84,10 +84,13 @@ class ChineseTokenizer(DummyTokenizer):
|
||||||
warnings.warn(warn_msg)
|
warnings.warn(warn_msg)
|
||||||
self.segmenter = Segmenter.char
|
self.segmenter = Segmenter.char
|
||||||
self.jieba_seg = try_jieba_import(self.segmenter)
|
self.jieba_seg = try_jieba_import(self.segmenter)
|
||||||
|
if _context == "loading":
|
||||||
|
self.pkuseg_seg = None
|
||||||
|
else:
|
||||||
self.pkuseg_seg = try_pkuseg_import(
|
self.pkuseg_seg = try_pkuseg_import(
|
||||||
self.segmenter,
|
self.segmenter,
|
||||||
pkuseg_model=self.pkuseg_model,
|
pkuseg_model=self.pkuseg_model,
|
||||||
pkuseg_user_dict=self.pkuseg_user_dict,
|
pkuseg_user_dict=self.pkuseg_user_dict
|
||||||
)
|
)
|
||||||
|
|
||||||
def __call__(self, text: str) -> Doc:
|
def __call__(self, text: str) -> Doc:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user