Allow the vocabulary to grow to 10,000, to prevent cold-start problem.

This commit is contained in:
Matthew Honnibal 2016-12-27 21:03:45 +01:00
parent 0c7720e162
commit ce4539dafd

View File

@ -262,9 +262,9 @@ cdef class Vocab:
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL:
cdef hash_t key
cdef bint is_oov = mem is not self.mem
if len(string) < 3:
if len(string) < 3 or self.length < 10000:
mem = self.mem
cdef bint is_oov = mem is not self.mem
lex = <LexemeC*>mem.alloc(sizeof(LexemeC), 1)
lex.orth = self.strings[string]
lex.length = len(string)