mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
* If final token is whitespace, don't mark it as owning a trailing space. Fixes Issue #154
This commit is contained in:
parent
223d2b3484
commit
3e9961d2c4
|
@ -130,7 +130,7 @@ cdef class Tokenizer:
|
||||||
cache_hit = self._try_cache(key, tokens)
|
cache_hit = self._try_cache(key, tokens)
|
||||||
if not cache_hit:
|
if not cache_hit:
|
||||||
self._tokenize(tokens, span, key)
|
self._tokenize(tokens, span, key)
|
||||||
tokens.c[tokens.length - 1].spacy = string[-1] == ' '
|
tokens.c[tokens.length - 1].spacy = string[-1] == ' ' and not in_ws
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
cdef int _try_cache(self, hash_t key, Doc tokens) except -1:
|
cdef int _try_cache(self, hash_t key, Doc tokens) except -1:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user