From 00febda2e3d22d2f0fea2ca18f44bc1de491feec Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 16 Aug 2018 01:04:34 +0200 Subject: [PATCH] Improve alignment around quotes --- spacy/_align.pyx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/spacy/_align.pyx b/spacy/_align.pyx index 07b6efbd4..8ae7cdf4e 100644 --- a/spacy/_align.pyx +++ b/spacy/_align.pyx @@ -186,6 +186,10 @@ def _convert_sequence(seq): cdef np.ndarray output = numpy.zeros((len(seq),), dtype='uint32') cdef bytes item_bytes for i, item in enumerate(seq): + if item == "``": + item = '"' + elif item == "''": + item = '"' if isinstance(item, unicode): item_bytes = item.encode('utf8') else: