mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 02:16:32 +03:00
Changed tokenizer to add infix when infix_start is offset
This commit is contained in:
parent
8be3392302
commit
33b0f86de3
|
@ -241,9 +241,8 @@ cdef class Tokenizer:
|
||||||
for match in matches:
|
for match in matches:
|
||||||
infix_start = match.start()
|
infix_start = match.start()
|
||||||
infix_end = match.end()
|
infix_end = match.end()
|
||||||
if infix_start == start:
|
|
||||||
continue
|
|
||||||
|
|
||||||
|
if infix_start != start:
|
||||||
span = string[start:infix_start]
|
span = string[start:infix_start]
|
||||||
tokens.push_back(self.vocab.get(tokens.mem, span), False)
|
tokens.push_back(self.vocab.get(tokens.mem, span), False)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user