mirror of
https://github.com/explosion/spaCy.git
synced 2025-09-22 12:06:43 +03:00
Set lex ID correctly for new tokens in Vocab
This commit is contained in:
parent
fe3c42a06b
commit
2f169fdb0a
|
@ -1,4 +1,5 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
|
# cython: profile=True
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
|
@ -154,7 +155,7 @@ cdef class Vocab:
|
||||||
lex = <LexemeC*>mem.alloc(sizeof(LexemeC), 1)
|
lex = <LexemeC*>mem.alloc(sizeof(LexemeC), 1)
|
||||||
lex.orth = self.strings.add(string)
|
lex.orth = self.strings.add(string)
|
||||||
lex.length = len(string)
|
lex.length = len(string)
|
||||||
lex.id = self.length
|
lex.id = self.vectors.key2row.get(lex.orth, 0)
|
||||||
if self.lex_attr_getters is not None:
|
if self.lex_attr_getters is not None:
|
||||||
for attr, func in self.lex_attr_getters.items():
|
for attr, func in self.lex_attr_getters.items():
|
||||||
value = func(string)
|
value = func(string)
|
||||||
|
@ -164,9 +165,7 @@ cdef class Vocab:
|
||||||
lex.prob = value
|
lex.prob = value
|
||||||
elif value is not None:
|
elif value is not None:
|
||||||
Lexeme.set_struct_attr(lex, attr, value)
|
Lexeme.set_struct_attr(lex, attr, value)
|
||||||
if is_oov:
|
if not is_oov:
|
||||||
lex.id = 0
|
|
||||||
else:
|
|
||||||
key = hash_string(string)
|
key = hash_string(string)
|
||||||
self._add_lex_to_vocab(key, lex)
|
self._add_lex_to_vocab(key, lex)
|
||||||
assert lex != NULL, string
|
assert lex != NULL, string
|
||||||
|
|
Loading…
Reference in New Issue
Block a user