mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 00:46:28 +03:00
Small retokenizer fix (#4174)
This commit is contained in:
parent
a8752a569d
commit
73b38c33e4
|
@ -388,6 +388,7 @@ def _split(Doc doc, int token_index, orths, heads, attrs):
|
|||
cdef const LexemeC* lex
|
||||
cdef TokenC* token
|
||||
cdef TokenC orig_token = doc.c[token_index]
|
||||
cdef int orig_length = len(doc)
|
||||
|
||||
if(len(heads) != nb_subtokens):
|
||||
raise ValueError(Errors.E115)
|
||||
|
@ -408,7 +409,7 @@ def _split(Doc doc, int token_index, orths, heads, attrs):
|
|||
if to_process_tensor:
|
||||
xp = get_array_module(doc.tensor)
|
||||
doc.tensor = xp.append(doc.tensor, xp.zeros((nb_subtokens,doc.tensor.shape[1]), dtype="float32"), axis=0)
|
||||
for token_to_move in range(doc.length - 1, token_index, -1):
|
||||
for token_to_move in range(orig_length - 1, token_index, -1):
|
||||
doc.c[token_to_move + nb_subtokens - 1] = doc.c[token_to_move]
|
||||
if to_process_tensor:
|
||||
doc.tensor[token_to_move + nb_subtokens - 1] = doc.tensor[token_to_move]
|
||||
|
|
Loading…
Reference in New Issue
Block a user