From 0afb54ac93e4a0e292b2d408e9ce7daeb524bcec Mon Sep 17 00:00:00 2001 From: Koichi Yasuoka Date: Wed, 9 Dec 2020 04:02:23 +0900 Subject: [PATCH] JapaneseTokenizer.pipe added (#6515) * JapaneseTokenizer.pipe added For [spacymoji](https://spacy.io/universe/project/spacymoji) with `Japanese()`. * DummyTokenizer.pipe added instead --- spacy/util.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/spacy/util.py b/spacy/util.py index 735bfc53b..95a9f087f 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -838,6 +838,13 @@ class SimpleFrozenDict(dict): class DummyTokenizer(object): + def __call__(self, text): + raise NotImplementedError + + def pipe(self, texts, **kwargs): + for text in texts: + yield self(text) + # add dummy methods for to_bytes, from_bytes, to_disk and from_disk to # allow serialization (see #1557) def to_bytes(self, **kwargs):