From 6f0eeefefd76712e98e458b37e28d28ad19eaff5 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 26 Sep 2020 23:19:48 +0200 Subject: [PATCH] Try 'context' concept in ChineseTokenizer --- spacy/lang/zh/__init__.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/spacy/lang/zh/__init__.py b/spacy/lang/zh/__init__.py index 5d3bd2a96..4b97548bb 100644 --- a/spacy/lang/zh/__init__.py +++ b/spacy/lang/zh/__init__.py @@ -71,9 +71,9 @@ class ChineseTokenizer(DummyTokenizer): self.pkuseg_user_dict = pkuseg_user_dict self.pkuseg_seg = None self.jieba_seg = None - self.configure_segmenter(segmenter) + self.configure_segmenter(segmenter, _context=nlp._context) - def configure_segmenter(self, segmenter: str): + def configure_segmenter(self, segmenter: str, *, _context: str=""): if segmenter not in Segmenter.values(): warn_msg = Warnings.W103.format( lang="Chinese", @@ -84,11 +84,14 @@ class ChineseTokenizer(DummyTokenizer): warnings.warn(warn_msg) self.segmenter = Segmenter.char self.jieba_seg = try_jieba_import(self.segmenter) - self.pkuseg_seg = try_pkuseg_import( - self.segmenter, - pkuseg_model=self.pkuseg_model, - pkuseg_user_dict=self.pkuseg_user_dict, - ) + if _context == "loading": + self.pkuseg_seg = None + else: + self.pkuseg_seg = try_pkuseg_import( + self.segmenter, + pkuseg_model=self.pkuseg_model, + pkuseg_user_dict=self.pkuseg_user_dict + ) def __call__(self, text: str) -> Doc: if self.segmenter == Segmenter.jieba: