mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Adjust lexeme sizing for attr_t being 64 bit
This commit is contained in:
parent
a5606c3eda
commit
f51e6a6c16
|
@ -27,7 +27,7 @@ cdef class Lexeme:
|
|||
cdef inline SerializedLexemeC c_to_bytes(const LexemeC* lex) nogil:
|
||||
cdef SerializedLexemeC lex_data
|
||||
buff = <const unsigned char*>&lex.flags
|
||||
end = <const unsigned char*>&lex.l2_norm + sizeof(lex.l2_norm)
|
||||
end = <const unsigned char*>&lex.sentiment + sizeof(lex.sentiment)
|
||||
for i in range(sizeof(lex_data.data)):
|
||||
lex_data.data[i] = buff[i]
|
||||
return lex_data
|
||||
|
|
|
@ -35,11 +35,11 @@ cdef class Lexeme:
|
|||
tag, dependency parse, or lemma (lemmatization depends on the part-of-speech
|
||||
tag).
|
||||
"""
|
||||
def __init__(self, Vocab vocab, int orth):
|
||||
def __init__(self, Vocab vocab, attr_t orth):
|
||||
"""Create a Lexeme object.
|
||||
|
||||
vocab (Vocab): The parent vocabulary
|
||||
orth (int): The orth id of the lexeme.
|
||||
orth (uint64): The orth id of the lexeme.
|
||||
Returns (Lexeme): The newly constructd object.
|
||||
"""
|
||||
self.vocab = vocab
|
||||
|
@ -51,7 +51,7 @@ cdef class Lexeme:
|
|||
if isinstance(other, Lexeme):
|
||||
a = self.orth
|
||||
b = other.orth
|
||||
elif isinstance(other, int):
|
||||
elif isinstance(other, long):
|
||||
a = self.orth
|
||||
b = other
|
||||
elif isinstance(other, str):
|
||||
|
@ -109,7 +109,7 @@ cdef class Lexeme:
|
|||
def to_bytes(self):
|
||||
lex_data = Lexeme.c_to_bytes(self.c)
|
||||
start = <const char*>&self.c.flags
|
||||
end = <const char*>&self.c.l2_norm + sizeof(self.c.l2_norm)
|
||||
end = <const char*>&self.c.sentiment + sizeof(self.c.sentiment)
|
||||
assert (end-start) == sizeof(lex_data.data), (end-start, sizeof(lex_data.data))
|
||||
byte_string = b'\0' * sizeof(lex_data.data)
|
||||
byte_chars = <char*>byte_string
|
||||
|
@ -192,31 +192,31 @@ cdef class Lexeme:
|
|||
|
||||
property lower:
|
||||
def __get__(self): return self.c.lower
|
||||
def __set__(self, int x): self.c.lower = x
|
||||
def __set__(self, attr_t x): self.c.lower = x
|
||||
|
||||
property norm:
|
||||
def __get__(self): return self.c.norm
|
||||
def __set__(self, int x): self.c.norm = x
|
||||
def __set__(self, attr_t x): self.c.norm = x
|
||||
|
||||
property shape:
|
||||
def __get__(self): return self.c.shape
|
||||
def __set__(self, int x): self.c.shape = x
|
||||
def __set__(self, attr_t x): self.c.shape = x
|
||||
|
||||
property prefix:
|
||||
def __get__(self): return self.c.prefix
|
||||
def __set__(self, int x): self.c.prefix = x
|
||||
def __set__(self, attr_t x): self.c.prefix = x
|
||||
|
||||
property suffix:
|
||||
def __get__(self): return self.c.suffix
|
||||
def __set__(self, int x): self.c.suffix = x
|
||||
def __set__(self, attr_t x): self.c.suffix = x
|
||||
|
||||
property cluster:
|
||||
def __get__(self): return self.c.cluster
|
||||
def __set__(self, int x): self.c.cluster = x
|
||||
def __set__(self, attr_t x): self.c.cluster = x
|
||||
|
||||
property lang:
|
||||
def __get__(self): return self.c.lang
|
||||
def __set__(self, int x): self.c.lang = x
|
||||
def __set__(self, attr_t x): self.c.lang = x
|
||||
|
||||
property prob:
|
||||
def __get__(self): return self.c.prob
|
||||
|
@ -252,7 +252,7 @@ cdef class Lexeme:
|
|||
|
||||
property is_oov:
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c, IS_OOV)
|
||||
def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_OOV, x)
|
||||
def __set__(self, attr_t x): Lexeme.c_set_flag(self.c, IS_OOV, x)
|
||||
|
||||
property is_stop:
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c, IS_STOP)
|
||||
|
|
|
@ -27,7 +27,7 @@ cdef struct LexemeC:
|
|||
|
||||
|
||||
cdef struct SerializedLexemeC:
|
||||
unsigned char[4*13 + 8] data
|
||||
unsigned char[8 + 8*10 + 4 + 4] data
|
||||
# sizeof(flags_t) # flags
|
||||
# + sizeof(attr_t) # lang
|
||||
# + sizeof(attr_t) # id
|
||||
|
@ -58,10 +58,10 @@ cdef struct TokenC:
|
|||
bint spacy
|
||||
int tag
|
||||
int idx
|
||||
int lemma
|
||||
attr_t lemma
|
||||
int sense
|
||||
int head
|
||||
int dep
|
||||
attr_t dep
|
||||
bint sent_start
|
||||
|
||||
uint32_t l_kids
|
||||
|
|
Loading…
Reference in New Issue
Block a user