From 3e9961d2c4810fd91cc383955e5e691bdaeb2e74 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 16 Jan 2016 17:08:59 +0100 Subject: [PATCH] * If final token is whitespace, don't mark it as owning a trailing space. Fixes Issue #154 --- spacy/tokenizer.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index 593d0dc7d..b6dd9f7f5 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -130,7 +130,7 @@ cdef class Tokenizer: cache_hit = self._try_cache(key, tokens) if not cache_hit: self._tokenize(tokens, span, key) - tokens.c[tokens.length - 1].spacy = string[-1] == ' ' + tokens.c[tokens.length - 1].spacy = string[-1] == ' ' and not in_ws return tokens cdef int _try_cache(self, hash_t key, Doc tokens) except -1: