mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-13 05:07:03 +03:00
Allow the vocabulary to grow to 10,000, to prevent cold-start problem.
This commit is contained in:
parent
0c7720e162
commit
ce4539dafd
|
@ -262,9 +262,9 @@ cdef class Vocab:
|
||||||
|
|
||||||
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL:
|
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL:
|
||||||
cdef hash_t key
|
cdef hash_t key
|
||||||
cdef bint is_oov = mem is not self.mem
|
if len(string) < 3 or self.length < 10000:
|
||||||
if len(string) < 3:
|
|
||||||
mem = self.mem
|
mem = self.mem
|
||||||
|
cdef bint is_oov = mem is not self.mem
|
||||||
lex = <LexemeC*>mem.alloc(sizeof(LexemeC), 1)
|
lex = <LexemeC*>mem.alloc(sizeof(LexemeC), 1)
|
||||||
lex.orth = self.strings[string]
|
lex.orth = self.strings[string]
|
||||||
lex.length = len(string)
|
lex.length = len(string)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user