mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 17:56:30 +03:00
Revert "Changes to Doc and Token for new string store scheme"
This reverts commit 99de44d864
.
This commit is contained in:
parent
bd7fe6420c
commit
6736977d82
|
@ -133,12 +133,6 @@ cdef class Doc:
|
|||
self.push_back(
|
||||
<const LexemeC*>self.vocab.get(self.mem, orth), has_space)
|
||||
|
||||
def __dealloc__(self):
|
||||
if self.mem is not None \
|
||||
and self.vocab is not None \
|
||||
and self.vocab.strings is not None:
|
||||
self.vocab.strings.remove_oov_map(self.mem)
|
||||
|
||||
def __getitem__(self, object i):
|
||||
'''
|
||||
doc[i]
|
||||
|
@ -606,14 +600,14 @@ cdef class Doc:
|
|||
if tag in self.vocab.morphology.tag_map:
|
||||
self.vocab.morphology.assign_tag(token, tag)
|
||||
else:
|
||||
token.tag = self.vocab.strings.intern(tag)
|
||||
token.lemma = self.vocab.strings.intern(lemma, mem=self.mem)
|
||||
token.tag = self.vocab.strings[tag]
|
||||
token.lemma = self.vocab.strings[lemma]
|
||||
if ent_type == 'O':
|
||||
token.ent_iob = 2
|
||||
token.ent_type = 0
|
||||
else:
|
||||
token.ent_iob = 3
|
||||
token.ent_type = self.vocab.strings.intern(ent_type)
|
||||
token.ent_type = self.vocab.strings[ent_type]
|
||||
# Begin by setting all the head indices to absolute token positions
|
||||
# This is easier to work with for now than the offsets
|
||||
# Before thinking of something simpler, beware the case where a dependency
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from libc.string cimport memcpy
|
||||
from cpython.mem cimport PyMem_Malloc, PyMem_Free
|
||||
# Compiler crashes on memory view coercion without this. Should report bug.
|
||||
|
@ -440,19 +438,19 @@ cdef class Token:
|
|||
|
||||
property orth_:
|
||||
def __get__(self):
|
||||
return self.vocab.strings.decode_int(self.c.lex.orth, mem=self.doc.mem)
|
||||
return self.vocab.strings.decode_int(self.c.lex.orth, mem=self.mem)
|
||||
|
||||
property lower_:
|
||||
def __get__(self):
|
||||
return self.vocab.strings.decode_int(self.c.lex.lower, mem=self.doc.mem)
|
||||
return self.vocab.strings.decode_int(self.c.lex.lower, mem=self.mem)
|
||||
|
||||
property norm_:
|
||||
def __get__(self):
|
||||
return self.vocab.strings.decode_int(self.c.lex.norm, mem=self.doc.mem)
|
||||
return self.vocab.strings.decode_int(self.c.lex.norm, mem=self.mem)
|
||||
|
||||
property shape_:
|
||||
def __get__(self):
|
||||
return self.vocab.strings.decode_int(self.c.lex.shape, mem=self.doc.mem)
|
||||
return self.vocab.strings.decode_int(self.c.lex.shape, mem=self.mem)
|
||||
|
||||
property prefix_:
|
||||
def __get__(self):
|
||||
|
@ -464,11 +462,11 @@ cdef class Token:
|
|||
|
||||
property lang_:
|
||||
def __get__(self):
|
||||
return self.vocab.strings.decode_int(self.c.lex.lang, mem=self.doc.mem)
|
||||
return self.vocab.strings.decode_int(self.c.lex.lang, mem=self.mem)
|
||||
|
||||
property lemma_:
|
||||
def __get__(self):
|
||||
return self.vocab.strings.decode_int(self.c.lemma, mem=self.doc.mem)
|
||||
return self.vocab.strings.decode_int(self.c.lemma, mem=self.mem)
|
||||
|
||||
property pos_:
|
||||
def __get__(self):
|
||||
|
@ -476,7 +474,7 @@ cdef class Token:
|
|||
|
||||
property tag_:
|
||||
def __get__(self):
|
||||
return self.vocab.strings.decode_int(self.c.tag, mem=self.doc.mem)
|
||||
return self.vocab.strings.decode_int(self.c.tag, mem=self.mem)
|
||||
|
||||
property dep_:
|
||||
def __get__(self):
|
||||
|
|
Loading…
Reference in New Issue
Block a user