diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 09d8a439d..66654482e 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -132,12 +132,6 @@ cdef class Doc: # must be created. self.push_back( self.vocab.get(self.mem, orth), has_space) - - def __dealloc__(self): - if self.mem is not None \ - and self.vocab is not None \ - and self.vocab.strings is not None: - self.vocab.strings.remove_oov_map(self.mem) def __getitem__(self, object i): ''' @@ -606,14 +600,14 @@ cdef class Doc: if tag in self.vocab.morphology.tag_map: self.vocab.morphology.assign_tag(token, tag) else: - token.tag = self.vocab.strings.intern(tag) - token.lemma = self.vocab.strings.intern(lemma, mem=self.mem) + token.tag = self.vocab.strings[tag] + token.lemma = self.vocab.strings[lemma] if ent_type == 'O': token.ent_iob = 2 token.ent_type = 0 else: token.ent_iob = 3 - token.ent_type = self.vocab.strings.intern(ent_type) + token.ent_type = self.vocab.strings[ent_type] # Begin by setting all the head indices to absolute token positions # This is easier to work with for now than the offsets # Before thinking of something simpler, beware the case where a dependency diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index 50a62fb0b..9320cb85a 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from libc.string cimport memcpy from cpython.mem cimport PyMem_Malloc, PyMem_Free # Compiler crashes on memory view coercion without this. Should report bug. @@ -440,19 +438,19 @@ cdef class Token: property orth_: def __get__(self): - return self.vocab.strings.decode_int(self.c.lex.orth, mem=self.doc.mem) + return self.vocab.strings.decode_int(self.c.lex.orth, mem=self.mem) property lower_: def __get__(self): - return self.vocab.strings.decode_int(self.c.lex.lower, mem=self.doc.mem) + return self.vocab.strings.decode_int(self.c.lex.lower, mem=self.mem) property norm_: def __get__(self): - return self.vocab.strings.decode_int(self.c.lex.norm, mem=self.doc.mem) + return self.vocab.strings.decode_int(self.c.lex.norm, mem=self.mem) property shape_: def __get__(self): - return self.vocab.strings.decode_int(self.c.lex.shape, mem=self.doc.mem) + return self.vocab.strings.decode_int(self.c.lex.shape, mem=self.mem) property prefix_: def __get__(self): @@ -464,11 +462,11 @@ cdef class Token: property lang_: def __get__(self): - return self.vocab.strings.decode_int(self.c.lex.lang, mem=self.doc.mem) + return self.vocab.strings.decode_int(self.c.lex.lang, mem=self.mem) property lemma_: def __get__(self): - return self.vocab.strings.decode_int(self.c.lemma, mem=self.doc.mem) + return self.vocab.strings.decode_int(self.c.lemma, mem=self.mem) property pos_: def __get__(self): @@ -476,7 +474,7 @@ cdef class Token: property tag_: def __get__(self): - return self.vocab.strings.decode_int(self.c.tag, mem=self.doc.mem) + return self.vocab.strings.decode_int(self.c.tag, mem=self.mem) property dep_: def __get__(self):