* Add encode_sense_strs function

This commit is contained in:
Matthew Honnibal 2015-07-03 05:45:16 +02:00
parent 1be5ab200f
commit 4a60b68a24
2 changed files with 20 additions and 0 deletions

View File

@ -59,5 +59,11 @@ cpdef enum:
N_SENSES
cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES
# Can we not define this compile time? Have to hard code? :(
# DEF INVALID_SENSE = 1 << 54
cdef flags_t encode_sense_strs(sense_names) except 18014398509481984

View File

@ -86,3 +86,17 @@ STRINGS = (
'V_stative',
'V_weather'
)
IDS = dict((sense_str, i) for i, sense_str in enumerate(STRINGS))
cdef flags_t encode_sense_strs(sense_names) except 18014398509481984:
cdef flags_t sense_bits = 0
cdef flags_t sense_id = 0
for sense_str in sense_names:
if '.' in sense_str:
sense_str = sense_str[0].upper() + '_' + sense_str.split('.')[1]
if sense_str != 'N_Tops':
sense_id = IDS[sense_str]
sense_bits |= (1 << sense_id)
return sense_bits