* If final token is whitespace, don't mark it as owning a trailing space. Fixes Issue #154

This commit is contained in:
Matthew Honnibal 2016-01-16 17:08:59 +01:00
parent 223d2b3484
commit 3e9961d2c4

View File

@ -130,7 +130,7 @@ cdef class Tokenizer:
cache_hit = self._try_cache(key, tokens)
if not cache_hit:
self._tokenize(tokens, span, key)
tokens.c[tokens.length - 1].spacy = string[-1] == ' '
tokens.c[tokens.length - 1].spacy = string[-1] == ' ' and not in_ws
return tokens
cdef int _try_cache(self, hash_t key, Doc tokens) except -1: