mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
Changed tokenizer to add infix when infix_start is offset
This commit is contained in:
parent
8be3392302
commit
33b0f86de3
|
@ -241,11 +241,10 @@ cdef class Tokenizer:
|
|||
for match in matches:
|
||||
infix_start = match.start()
|
||||
infix_end = match.end()
|
||||
if infix_start == start:
|
||||
continue
|
||||
|
||||
span = string[start:infix_start]
|
||||
tokens.push_back(self.vocab.get(tokens.mem, span), False)
|
||||
if infix_start != start:
|
||||
span = string[start:infix_start]
|
||||
tokens.push_back(self.vocab.get(tokens.mem, span), False)
|
||||
|
||||
if infix_start != infix_end:
|
||||
# If infix_start != infix_end, it means the infix
|
||||
|
|
Loading…
Reference in New Issue
Block a user