mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
Adjust lexeme sizing for attr_t being 64 bit
This commit is contained in:
parent
a5606c3eda
commit
f51e6a6c16
|
@ -27,7 +27,7 @@ cdef class Lexeme:
|
||||||
cdef inline SerializedLexemeC c_to_bytes(const LexemeC* lex) nogil:
|
cdef inline SerializedLexemeC c_to_bytes(const LexemeC* lex) nogil:
|
||||||
cdef SerializedLexemeC lex_data
|
cdef SerializedLexemeC lex_data
|
||||||
buff = <const unsigned char*>&lex.flags
|
buff = <const unsigned char*>&lex.flags
|
||||||
end = <const unsigned char*>&lex.l2_norm + sizeof(lex.l2_norm)
|
end = <const unsigned char*>&lex.sentiment + sizeof(lex.sentiment)
|
||||||
for i in range(sizeof(lex_data.data)):
|
for i in range(sizeof(lex_data.data)):
|
||||||
lex_data.data[i] = buff[i]
|
lex_data.data[i] = buff[i]
|
||||||
return lex_data
|
return lex_data
|
||||||
|
|
|
@ -35,11 +35,11 @@ cdef class Lexeme:
|
||||||
tag, dependency parse, or lemma (lemmatization depends on the part-of-speech
|
tag, dependency parse, or lemma (lemmatization depends on the part-of-speech
|
||||||
tag).
|
tag).
|
||||||
"""
|
"""
|
||||||
def __init__(self, Vocab vocab, int orth):
|
def __init__(self, Vocab vocab, attr_t orth):
|
||||||
"""Create a Lexeme object.
|
"""Create a Lexeme object.
|
||||||
|
|
||||||
vocab (Vocab): The parent vocabulary
|
vocab (Vocab): The parent vocabulary
|
||||||
orth (int): The orth id of the lexeme.
|
orth (uint64): The orth id of the lexeme.
|
||||||
Returns (Lexeme): The newly constructd object.
|
Returns (Lexeme): The newly constructd object.
|
||||||
"""
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
|
@ -51,7 +51,7 @@ cdef class Lexeme:
|
||||||
if isinstance(other, Lexeme):
|
if isinstance(other, Lexeme):
|
||||||
a = self.orth
|
a = self.orth
|
||||||
b = other.orth
|
b = other.orth
|
||||||
elif isinstance(other, int):
|
elif isinstance(other, long):
|
||||||
a = self.orth
|
a = self.orth
|
||||||
b = other
|
b = other
|
||||||
elif isinstance(other, str):
|
elif isinstance(other, str):
|
||||||
|
@ -109,7 +109,7 @@ cdef class Lexeme:
|
||||||
def to_bytes(self):
|
def to_bytes(self):
|
||||||
lex_data = Lexeme.c_to_bytes(self.c)
|
lex_data = Lexeme.c_to_bytes(self.c)
|
||||||
start = <const char*>&self.c.flags
|
start = <const char*>&self.c.flags
|
||||||
end = <const char*>&self.c.l2_norm + sizeof(self.c.l2_norm)
|
end = <const char*>&self.c.sentiment + sizeof(self.c.sentiment)
|
||||||
assert (end-start) == sizeof(lex_data.data), (end-start, sizeof(lex_data.data))
|
assert (end-start) == sizeof(lex_data.data), (end-start, sizeof(lex_data.data))
|
||||||
byte_string = b'\0' * sizeof(lex_data.data)
|
byte_string = b'\0' * sizeof(lex_data.data)
|
||||||
byte_chars = <char*>byte_string
|
byte_chars = <char*>byte_string
|
||||||
|
@ -192,31 +192,31 @@ cdef class Lexeme:
|
||||||
|
|
||||||
property lower:
|
property lower:
|
||||||
def __get__(self): return self.c.lower
|
def __get__(self): return self.c.lower
|
||||||
def __set__(self, int x): self.c.lower = x
|
def __set__(self, attr_t x): self.c.lower = x
|
||||||
|
|
||||||
property norm:
|
property norm:
|
||||||
def __get__(self): return self.c.norm
|
def __get__(self): return self.c.norm
|
||||||
def __set__(self, int x): self.c.norm = x
|
def __set__(self, attr_t x): self.c.norm = x
|
||||||
|
|
||||||
property shape:
|
property shape:
|
||||||
def __get__(self): return self.c.shape
|
def __get__(self): return self.c.shape
|
||||||
def __set__(self, int x): self.c.shape = x
|
def __set__(self, attr_t x): self.c.shape = x
|
||||||
|
|
||||||
property prefix:
|
property prefix:
|
||||||
def __get__(self): return self.c.prefix
|
def __get__(self): return self.c.prefix
|
||||||
def __set__(self, int x): self.c.prefix = x
|
def __set__(self, attr_t x): self.c.prefix = x
|
||||||
|
|
||||||
property suffix:
|
property suffix:
|
||||||
def __get__(self): return self.c.suffix
|
def __get__(self): return self.c.suffix
|
||||||
def __set__(self, int x): self.c.suffix = x
|
def __set__(self, attr_t x): self.c.suffix = x
|
||||||
|
|
||||||
property cluster:
|
property cluster:
|
||||||
def __get__(self): return self.c.cluster
|
def __get__(self): return self.c.cluster
|
||||||
def __set__(self, int x): self.c.cluster = x
|
def __set__(self, attr_t x): self.c.cluster = x
|
||||||
|
|
||||||
property lang:
|
property lang:
|
||||||
def __get__(self): return self.c.lang
|
def __get__(self): return self.c.lang
|
||||||
def __set__(self, int x): self.c.lang = x
|
def __set__(self, attr_t x): self.c.lang = x
|
||||||
|
|
||||||
property prob:
|
property prob:
|
||||||
def __get__(self): return self.c.prob
|
def __get__(self): return self.c.prob
|
||||||
|
@ -252,7 +252,7 @@ cdef class Lexeme:
|
||||||
|
|
||||||
property is_oov:
|
property is_oov:
|
||||||
def __get__(self): return Lexeme.c_check_flag(self.c, IS_OOV)
|
def __get__(self): return Lexeme.c_check_flag(self.c, IS_OOV)
|
||||||
def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_OOV, x)
|
def __set__(self, attr_t x): Lexeme.c_set_flag(self.c, IS_OOV, x)
|
||||||
|
|
||||||
property is_stop:
|
property is_stop:
|
||||||
def __get__(self): return Lexeme.c_check_flag(self.c, IS_STOP)
|
def __get__(self): return Lexeme.c_check_flag(self.c, IS_STOP)
|
||||||
|
|
|
@ -27,7 +27,7 @@ cdef struct LexemeC:
|
||||||
|
|
||||||
|
|
||||||
cdef struct SerializedLexemeC:
|
cdef struct SerializedLexemeC:
|
||||||
unsigned char[4*13 + 8] data
|
unsigned char[8 + 8*10 + 4 + 4] data
|
||||||
# sizeof(flags_t) # flags
|
# sizeof(flags_t) # flags
|
||||||
# + sizeof(attr_t) # lang
|
# + sizeof(attr_t) # lang
|
||||||
# + sizeof(attr_t) # id
|
# + sizeof(attr_t) # id
|
||||||
|
@ -58,10 +58,10 @@ cdef struct TokenC:
|
||||||
bint spacy
|
bint spacy
|
||||||
int tag
|
int tag
|
||||||
int idx
|
int idx
|
||||||
int lemma
|
attr_t lemma
|
||||||
int sense
|
int sense
|
||||||
int head
|
int head
|
||||||
int dep
|
attr_t dep
|
||||||
bint sent_start
|
bint sent_start
|
||||||
|
|
||||||
uint32_t l_kids
|
uint32_t l_kids
|
||||||
|
|
Loading…
Reference in New Issue
Block a user