mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Small retokenizer fix (#4174)
This commit is contained in:
parent
a8752a569d
commit
73b38c33e4
|
@ -388,6 +388,7 @@ def _split(Doc doc, int token_index, orths, heads, attrs):
|
||||||
cdef const LexemeC* lex
|
cdef const LexemeC* lex
|
||||||
cdef TokenC* token
|
cdef TokenC* token
|
||||||
cdef TokenC orig_token = doc.c[token_index]
|
cdef TokenC orig_token = doc.c[token_index]
|
||||||
|
cdef int orig_length = len(doc)
|
||||||
|
|
||||||
if(len(heads) != nb_subtokens):
|
if(len(heads) != nb_subtokens):
|
||||||
raise ValueError(Errors.E115)
|
raise ValueError(Errors.E115)
|
||||||
|
@ -408,7 +409,7 @@ def _split(Doc doc, int token_index, orths, heads, attrs):
|
||||||
if to_process_tensor:
|
if to_process_tensor:
|
||||||
xp = get_array_module(doc.tensor)
|
xp = get_array_module(doc.tensor)
|
||||||
doc.tensor = xp.append(doc.tensor, xp.zeros((nb_subtokens,doc.tensor.shape[1]), dtype="float32"), axis=0)
|
doc.tensor = xp.append(doc.tensor, xp.zeros((nb_subtokens,doc.tensor.shape[1]), dtype="float32"), axis=0)
|
||||||
for token_to_move in range(doc.length - 1, token_index, -1):
|
for token_to_move in range(orig_length - 1, token_index, -1):
|
||||||
doc.c[token_to_move + nb_subtokens - 1] = doc.c[token_to_move]
|
doc.c[token_to_move + nb_subtokens - 1] = doc.c[token_to_move]
|
||||||
if to_process_tensor:
|
if to_process_tensor:
|
||||||
doc.tensor[token_to_move + nb_subtokens - 1] = doc.tensor[token_to_move]
|
doc.tensor[token_to_move + nb_subtokens - 1] = doc.tensor[token_to_move]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user