Revert "Changes to Doc and Token for new string store scheme"

This reverts commit 99de44d864.
This commit is contained in:
Matthew Honnibal 2016-09-30 20:11:15 +02:00
parent bd7fe6420c
commit 6736977d82
2 changed files with 10 additions and 18 deletions

View File

@ -133,12 +133,6 @@ cdef class Doc:
self.push_back( self.push_back(
<const LexemeC*>self.vocab.get(self.mem, orth), has_space) <const LexemeC*>self.vocab.get(self.mem, orth), has_space)
def __dealloc__(self):
if self.mem is not None \
and self.vocab is not None \
and self.vocab.strings is not None:
self.vocab.strings.remove_oov_map(self.mem)
def __getitem__(self, object i): def __getitem__(self, object i):
''' '''
doc[i] doc[i]
@ -606,14 +600,14 @@ cdef class Doc:
if tag in self.vocab.morphology.tag_map: if tag in self.vocab.morphology.tag_map:
self.vocab.morphology.assign_tag(token, tag) self.vocab.morphology.assign_tag(token, tag)
else: else:
token.tag = self.vocab.strings.intern(tag) token.tag = self.vocab.strings[tag]
token.lemma = self.vocab.strings.intern(lemma, mem=self.mem) token.lemma = self.vocab.strings[lemma]
if ent_type == 'O': if ent_type == 'O':
token.ent_iob = 2 token.ent_iob = 2
token.ent_type = 0 token.ent_type = 0
else: else:
token.ent_iob = 3 token.ent_iob = 3
token.ent_type = self.vocab.strings.intern(ent_type) token.ent_type = self.vocab.strings[ent_type]
# Begin by setting all the head indices to absolute token positions # Begin by setting all the head indices to absolute token positions
# This is easier to work with for now than the offsets # This is easier to work with for now than the offsets
# Before thinking of something simpler, beware the case where a dependency # Before thinking of something simpler, beware the case where a dependency

View File

@ -1,5 +1,3 @@
from __future__ import unicode_literals
from libc.string cimport memcpy from libc.string cimport memcpy
from cpython.mem cimport PyMem_Malloc, PyMem_Free from cpython.mem cimport PyMem_Malloc, PyMem_Free
# Compiler crashes on memory view coercion without this. Should report bug. # Compiler crashes on memory view coercion without this. Should report bug.
@ -440,19 +438,19 @@ cdef class Token:
property orth_: property orth_:
def __get__(self): def __get__(self):
return self.vocab.strings.decode_int(self.c.lex.orth, mem=self.doc.mem) return self.vocab.strings.decode_int(self.c.lex.orth, mem=self.mem)
property lower_: property lower_:
def __get__(self): def __get__(self):
return self.vocab.strings.decode_int(self.c.lex.lower, mem=self.doc.mem) return self.vocab.strings.decode_int(self.c.lex.lower, mem=self.mem)
property norm_: property norm_:
def __get__(self): def __get__(self):
return self.vocab.strings.decode_int(self.c.lex.norm, mem=self.doc.mem) return self.vocab.strings.decode_int(self.c.lex.norm, mem=self.mem)
property shape_: property shape_:
def __get__(self): def __get__(self):
return self.vocab.strings.decode_int(self.c.lex.shape, mem=self.doc.mem) return self.vocab.strings.decode_int(self.c.lex.shape, mem=self.mem)
property prefix_: property prefix_:
def __get__(self): def __get__(self):
@ -464,11 +462,11 @@ cdef class Token:
property lang_: property lang_:
def __get__(self): def __get__(self):
return self.vocab.strings.decode_int(self.c.lex.lang, mem=self.doc.mem) return self.vocab.strings.decode_int(self.c.lex.lang, mem=self.mem)
property lemma_: property lemma_:
def __get__(self): def __get__(self):
return self.vocab.strings.decode_int(self.c.lemma, mem=self.doc.mem) return self.vocab.strings.decode_int(self.c.lemma, mem=self.mem)
property pos_: property pos_:
def __get__(self): def __get__(self):
@ -476,7 +474,7 @@ cdef class Token:
property tag_: property tag_:
def __get__(self): def __get__(self):
return self.vocab.strings.decode_int(self.c.tag, mem=self.doc.mem) return self.vocab.strings.decode_int(self.c.tag, mem=self.mem)
property dep_: property dep_:
def __get__(self): def __get__(self):