From 10c930cc96b731d88f78998248d54dfe2bfd8934 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 26 Feb 2021 09:48:14 +0100 Subject: [PATCH] Re-refactor Sentencizer with Pipe API (#7176) Reapply the refactoring (#4721) so that `Sentencizer` uses the faster `predict` and `set_annotations` for both `__call__` and `pipe`. --- spacy/pipeline/sentencizer.pyx | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/spacy/pipeline/sentencizer.pyx b/spacy/pipeline/sentencizer.pyx index b10cdd8e8..60102efcb 100644 --- a/spacy/pipeline/sentencizer.pyx +++ b/spacy/pipeline/sentencizer.pyx @@ -66,26 +66,12 @@ class Sentencizer(Pipe): """ error_handler = self.get_error_handler() try: - self._call(doc) + tags = self.predict([doc]) + self.set_annotations([doc], tags) return doc except Exception as e: error_handler(self.name, self, [doc], e) - def _call(self, doc): - start = 0 - seen_period = False - for i, token in enumerate(doc): - is_in_punct_chars = token.text in self.punct_chars - token.is_sent_start = i == 0 - if seen_period and not token.is_punct and not is_in_punct_chars: - doc[start].is_sent_start = True - start = token.i - seen_period = False - elif is_in_punct_chars: - seen_period = True - if start < len(doc): - doc[start].is_sent_start = True - def predict(self, docs): """Apply the pipe to a batch of docs, without modifying them.