mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 09:56:28 +03:00
Fix token_match in tokenizer
This commit is contained in:
parent
724831b066
commit
cf693f0eae
|
@ -404,9 +404,7 @@ cdef class Tokenizer:
|
|||
cdef unicode minus_suf
|
||||
cdef size_t last_size = 0
|
||||
while string and len(string) != last_size:
|
||||
if self.token_match and self.token_match(string) \
|
||||
and not self.find_prefix(string) \
|
||||
and not self.find_suffix(string):
|
||||
if self.token_match and self.token_match(string):
|
||||
break
|
||||
if with_special_cases and self._specials.get(hash_string(string)) != NULL:
|
||||
break
|
||||
|
@ -679,6 +677,8 @@ cdef class Tokenizer:
|
|||
break
|
||||
suffixes.append(("SUFFIX", substring[split:]))
|
||||
substring = substring[:split]
|
||||
if len(substring) == 0:
|
||||
continue
|
||||
if token_match(substring):
|
||||
tokens.append(("TOKEN_MATCH", substring))
|
||||
substring = ''
|
||||
|
|
Loading…
Reference in New Issue
Block a user