mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Changes to Doc and Token for new string store scheme
This commit is contained in:
parent
78f19baafa
commit
99de44d864
|
@ -132,6 +132,12 @@ cdef class Doc:
|
|||
# must be created.
|
||||
self.push_back(
|
||||
<const LexemeC*>self.vocab.get(self.mem, orth), has_space)
|
||||
|
||||
def __dealloc__(self):
|
||||
if self.mem is not None \
|
||||
and self.vocab is not None \
|
||||
and self.vocab.strings is not None:
|
||||
self.vocab.strings.remove_oov_map(self.mem)
|
||||
|
||||
def __getitem__(self, object i):
|
||||
'''
|
||||
|
@ -600,14 +606,14 @@ cdef class Doc:
|
|||
if tag in self.vocab.morphology.tag_map:
|
||||
self.vocab.morphology.assign_tag(token, tag)
|
||||
else:
|
||||
token.tag = self.vocab.strings[tag]
|
||||
token.lemma = self.vocab.strings[lemma]
|
||||
token.tag = self.vocab.strings.intern(tag)
|
||||
token.lemma = self.vocab.strings.intern(lemma, mem=self.mem)
|
||||
if ent_type == 'O':
|
||||
token.ent_iob = 2
|
||||
token.ent_type = 0
|
||||
else:
|
||||
token.ent_iob = 3
|
||||
token.ent_type = self.vocab.strings[ent_type]
|
||||
token.ent_type = self.vocab.strings.intern(ent_type)
|
||||
# Begin by setting all the head indices to absolute token positions
|
||||
# This is easier to work with for now than the offsets
|
||||
# Before thinking of something simpler, beware the case where a dependency
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from libc.string cimport memcpy
|
||||
from cpython.mem cimport PyMem_Malloc, PyMem_Free
|
||||
# Compiler crashes on memory view coercion without this. Should report bug.
|
||||
|
@ -438,19 +440,19 @@ cdef class Token:
|
|||
|
||||
property orth_:
|
||||
def __get__(self):
|
||||
return self.vocab.strings.decode_int(self.c.lex.orth, mem=self.mem)
|
||||
return self.vocab.strings.decode_int(self.c.lex.orth, mem=self.doc.mem)
|
||||
|
||||
property lower_:
|
||||
def __get__(self):
|
||||
return self.vocab.strings.decode_int(self.c.lex.lower, mem=self.mem)
|
||||
return self.vocab.strings.decode_int(self.c.lex.lower, mem=self.doc.mem)
|
||||
|
||||
property norm_:
|
||||
def __get__(self):
|
||||
return self.vocab.strings.decode_int(self.c.lex.norm, mem=self.mem)
|
||||
return self.vocab.strings.decode_int(self.c.lex.norm, mem=self.doc.mem)
|
||||
|
||||
property shape_:
|
||||
def __get__(self):
|
||||
return self.vocab.strings.decode_int(self.c.lex.shape, mem=self.mem)
|
||||
return self.vocab.strings.decode_int(self.c.lex.shape, mem=self.doc.mem)
|
||||
|
||||
property prefix_:
|
||||
def __get__(self):
|
||||
|
@ -462,11 +464,11 @@ cdef class Token:
|
|||
|
||||
property lang_:
|
||||
def __get__(self):
|
||||
return self.vocab.strings.decode_int(self.c.lex.lang, mem=self.mem)
|
||||
return self.vocab.strings.decode_int(self.c.lex.lang, mem=self.doc.mem)
|
||||
|
||||
property lemma_:
|
||||
def __get__(self):
|
||||
return self.vocab.strings.decode_int(self.c.lemma, mem=self.mem)
|
||||
return self.vocab.strings.decode_int(self.c.lemma, mem=self.doc.mem)
|
||||
|
||||
property pos_:
|
||||
def __get__(self):
|
||||
|
@ -474,7 +476,7 @@ cdef class Token:
|
|||
|
||||
property tag_:
|
||||
def __get__(self):
|
||||
return self.vocab.strings.decode_int(self.c.tag, mem=self.mem)
|
||||
return self.vocab.strings.decode_int(self.c.tag, mem=self.doc.mem)
|
||||
|
||||
property dep_:
|
||||
def __get__(self):
|
||||
|
|
Loading…
Reference in New Issue
Block a user