From 6117adcd6d2eaa7988ab3e9bfc0789881f1afe16 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 25 Jan 2021 00:23:02 +1100 Subject: [PATCH] Make vocab always own lexemes --- spacy/vocab.pyx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 8359d8452..e8ed1b61c 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -161,8 +161,11 @@ cdef class Vocab: return self._new_lexeme(mem, self.strings[orth]) cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL: - if len(string) < 3 or self.length < 10000: - mem = self.mem + #if len(string) < 3 or self.length < 10000: + # mem = self.mem + # TODO: Experiment with never allowing the Doc to own lexemes, to see + # if it solves the Doc.copy() issue. + mem = self.mem cdef bint is_oov = mem is not self.mem lex = mem.alloc(1, sizeof(LexemeC)) lex.orth = self.strings.add(string)