From 5a272d90295f13e56eda20ba108752fac875bb5f Mon Sep 17 00:00:00 2001 From: Matthw Honnibal Date: Mon, 21 Oct 2019 03:56:15 +0200 Subject: [PATCH] Add method to decode predicted characters --- spacy/pipeline/pipes.pyx | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index 7aca98010..5a68b83dc 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -908,6 +908,20 @@ class ClozeMultitask(Pipe): if losses is not None: losses[self.name] += loss + @staticmethod + def decode_utf8_predictions(char_array): + # The format alternates filling from start and end, and 255 is missing + words = [] + char_array = char_array.reshape((char_array.shape[0], -1, 256)) + nr_char = char_array.shape[1] + char_array = char_array.argmax(axis=-1) + for row in char_array: + starts = [chr(c) for c in row[::2] if c != 255] + ends = [chr(c) for c in row[1::2] if c != 255] + word = "".join(starts + list(reversed(ends))) + words.append(word) + return words + class TextCategorizer(Pipe): """Pipeline component for text classification.