mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Allow the vocabulary to grow to 10,000, to prevent cold-start problem.
This commit is contained in:
		
							parent
							
								
									0c7720e162
								
							
						
					
					
						commit
						ce4539dafd
					
				| 
						 | 
					@ -262,9 +262,9 @@ cdef class Vocab:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL:
 | 
					    cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL:
 | 
				
			||||||
        cdef hash_t key
 | 
					        cdef hash_t key
 | 
				
			||||||
        cdef bint is_oov = mem is not self.mem
 | 
					        if len(string) < 3 or self.length < 10000:
 | 
				
			||||||
        if len(string) < 3:
 | 
					 | 
				
			||||||
            mem = self.mem
 | 
					            mem = self.mem
 | 
				
			||||||
 | 
					        cdef bint is_oov = mem is not self.mem
 | 
				
			||||||
        lex = <LexemeC*>mem.alloc(sizeof(LexemeC), 1)
 | 
					        lex = <LexemeC*>mem.alloc(sizeof(LexemeC), 1)
 | 
				
			||||||
        lex.orth = self.strings[string]
 | 
					        lex.orth = self.strings[string]
 | 
				
			||||||
        lex.length = len(string)
 | 
					        lex.length = len(string)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user