* Fix ugly py_check_flag and py_set_flag functions in Lexeme

This commit is contained in:
Matthew Honnibal 2015-09-15 13:06:18 +10:00
parent 9561d88529
commit 193f127f81
3 changed files with 49 additions and 46 deletions

View File

@ -26,7 +26,7 @@ cdef class Lexeme:
@staticmethod
cdef inline void set_struct_attr(LexemeC* lex, attr_id_t name, attr_t value) nogil:
if name < (sizeof(flags_t) * 8):
Lexeme.set_flag(lex, name, value)
Lexeme.c_set_flag(lex, name, value)
elif name == ID:
lex.id = value
elif name == LOWER:
@ -45,7 +45,7 @@ cdef class Lexeme:
@staticmethod
cdef inline attr_t get_struct_attr(const LexemeC* lex, attr_id_t feat_name) nogil:
if feat_name < (sizeof(flags_t) * 8):
if Lexeme.check_flag(lex, feat_name):
if Lexeme.c_check_flag(lex, feat_name):
return 1
else:
return 0
@ -71,12 +71,15 @@ cdef class Lexeme:
return 0
@staticmethod
cdef inline bint check_flag(const LexemeC* lexeme, attr_id_t flag_id) nogil:
cdef inline bint c_check_flag(const LexemeC* lexeme, attr_id_t flag_id) nogil:
cdef flags_t one = 1
return lexeme.flags & (one << flag_id)
if lexeme.flags & (one << flag_id):
return True
else:
return False
@staticmethod
cdef inline bint set_flag(LexemeC* lex, attr_id_t flag_id, int value) nogil:
cdef inline bint c_set_flag(LexemeC* lex, attr_id_t flag_id, bint value) nogil:
cdef flags_t one = 1
if value:
lex.flags |= one << flag_id

View File

@ -36,11 +36,11 @@ cdef class Lexeme:
self.c = <LexemeC*><void*>vocab.get_by_orth(vocab.mem, orth)
assert self.c.orth == orth
def py_set_flag(self, attr_id_t flag_id):
Lexeme.set_flag(self.c, flag_id, True)
def set_flag(self, attr_id_t flag_id, bint value):
Lexeme.c_set_flag(self.c, flag_id, value)
def py_check_flag(self, attr_id_t flag_id):
return True if Lexeme.check_flag(self.c, flag_id) else False
def check_flag(self, attr_id_t flag_id):
return True if Lexeme.c_check_flag(self.c, flag_id) else False
def similarity(self, other):
return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
@ -119,49 +119,49 @@ cdef class Lexeme:
def __set__(self, flags_t x): self.c.flags = x
property is_oov:
def __get__(self): return Lexeme.check_flag(self.c, IS_OOV)
def __set__(self, bint x): Lexeme.set_flag(self.c, IS_OOV, x)
def __get__(self): return Lexeme.c_check_flag(self.c, IS_OOV)
def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_OOV, x)
property is_stop:
def __get__(self): return Lexeme.check_flag(self.c, IS_STOP)
def __set__(self, bint x): Lexeme.set_flag(self.c, IS_STOP, x)
def __get__(self): return Lexeme.c_check_flag(self.c, IS_STOP)
def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_STOP, x)
property is_alpha:
def __get__(self): return Lexeme.check_flag(self.c, IS_ALPHA)
def __set__(self, bint x): Lexeme.set_flag(self.c, IS_ALPHA, x)
def __get__(self): return Lexeme.c_check_flag(self.c, IS_ALPHA)
def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_ALPHA, x)
property is_ascii:
def __get__(self): return Lexeme.check_flag(self.c, IS_ASCII)
def __set__(self, bint x): Lexeme.set_flag(self.c, IS_ASCII, x)
def __get__(self): return Lexeme.c_check_flag(self.c, IS_ASCII)
def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_ASCII, x)
property is_digit:
def __get__(self): return Lexeme.check_flag(self.c, IS_DIGIT)
def __set__(self, bint x): Lexeme.set_flag(self.c, IS_DIGIT, x)
def __get__(self): return Lexeme.c_check_flag(self.c, IS_DIGIT)
def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_DIGIT, x)
property is_lower:
def __get__(self): return Lexeme.check_flag(self.c, IS_LOWER)
def __set__(self, bint x): Lexeme.set_flag(self.c, IS_LOWER, x)
def __get__(self): return Lexeme.c_check_flag(self.c, IS_LOWER)
def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_LOWER, x)
property is_title:
def __get__(self): return Lexeme.check_flag(self.c, IS_TITLE)
def __set__(self, bint x): Lexeme.set_flag(self.c, IS_TITLE, x)
def __get__(self): return Lexeme.c_check_flag(self.c, IS_TITLE)
def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_TITLE, x)
property is_punct:
def __get__(self): return Lexeme.check_flag(self.c, IS_PUNCT)
def __set__(self, bint x): Lexeme.set_flag(self.c, IS_PUNCT, x)
def __get__(self): return Lexeme.c_check_flag(self.c, IS_PUNCT)
def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_PUNCT, x)
property is_space:
def __get__(self): return Lexeme.check_flag(self.c, IS_SPACE)
def __set__(self, bint x): Lexeme.set_flag(self.c, IS_SPACE, x)
def __get__(self): return Lexeme.c_check_flag(self.c, IS_SPACE)
def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_SPACE, x)
property like_url:
def __get__(self): return Lexeme.check_flag(self.c, LIKE_URL)
def __set__(self, bint x): Lexeme.set_flag(self.c, LIKE_URL, x)
def __get__(self): return Lexeme.c_check_flag(self.c, LIKE_URL)
def __set__(self, bint x): Lexeme.c_set_flag(self.c, LIKE_URL, x)
property like_num:
def __get__(self): return Lexeme.check_flag(self.c, LIKE_NUM)
def __set__(self, bint x): Lexeme.set_flag(self.c, LIKE_NUM, x)
def __get__(self): return Lexeme.c_check_flag(self.c, LIKE_NUM)
def __set__(self, bint x): Lexeme.c_set_flag(self.c, LIKE_NUM, x)
property like_email:
def __get__(self): return Lexeme.check_flag(self.c, LIKE_EMAIL)
def __set__(self, bint x): Lexeme.set_flag(self.c, LIKE_EMAIL, x)
def __get__(self): return Lexeme.c_check_flag(self.c, LIKE_EMAIL)
def __set__(self, bint x): Lexeme.c_set_flag(self.c, LIKE_EMAIL, x)

View File

@ -44,7 +44,7 @@ cdef class Token:
return self.string
cpdef bint check_flag(self, attr_id_t flag_id) except -1:
return Lexeme.check_flag(self.c.lex, flag_id)
return Lexeme.c_check_flag(self.c.lex, flag_id)
def nbor(self, int i=1):
return self.doc[self.i+i]
@ -311,40 +311,40 @@ cdef class Token:
return self.vocab.strings[self.c.dep]
property is_oov:
def __get__(self): return Lexeme.check_flag(self.c.lex, IS_OOV)
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_OOV)
property is_stop:
def __get__(self): return Lexeme.check_flag(self.c.lex, IS_STOP)
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_STOP)
property is_alpha:
def __get__(self): return Lexeme.check_flag(self.c.lex, IS_ALPHA)
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_ALPHA)
property is_ascii:
def __get__(self): return Lexeme.check_flag(self.c.lex, IS_ASCII)
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_ASCII)
property is_digit:
def __get__(self): return Lexeme.check_flag(self.c.lex, IS_DIGIT)
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_DIGIT)
property is_lower:
def __get__(self): return Lexeme.check_flag(self.c.lex, IS_LOWER)
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_LOWER)
property is_title:
def __get__(self): return Lexeme.check_flag(self.c.lex, IS_TITLE)
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_TITLE)
property is_punct:
def __get__(self): return Lexeme.check_flag(self.c.lex, IS_PUNCT)
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_PUNCT)
property is_space:
def __get__(self): return Lexeme.check_flag(self.c.lex, IS_SPACE)
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_SPACE)
property like_url:
def __get__(self): return Lexeme.check_flag(self.c.lex, LIKE_URL)
def __get__(self): return Lexeme.c_check_flag(self.c.lex, LIKE_URL)
property like_num:
def __get__(self): return Lexeme.check_flag(self.c.lex, LIKE_NUM)
def __get__(self): return Lexeme.c_check_flag(self.c.lex, LIKE_NUM)
property like_email:
def __get__(self): return Lexeme.check_flag(self.c.lex, LIKE_EMAIL)
def __get__(self): return Lexeme.c_check_flag(self.c.lex, LIKE_EMAIL)
_pos_id_to_string = {id_: string for string, id_ in UNIV_POS_NAMES.items()}