from __future__ import unicode_literals cimport parts_of_speech POS_SENSES[parts_of_speech.NO_TAG] = 0 POS_SENSES[parts_of_speech.ADJ] = 0 POS_SENSES[parts_of_speech.ADV] = 0 POS_SENSES[parts_of_speech.ADP] = 0 POS_SENSES[parts_of_speech.CONJ] = 0 POS_SENSES[parts_of_speech.DET] = 0 POS_SENSES[parts_of_speech.NOUN] = 0 POS_SENSES[parts_of_speech.NUM] = 0 POS_SENSES[parts_of_speech.PRON] = 0 POS_SENSES[parts_of_speech.PRT] = 0 POS_SENSES[parts_of_speech.VERB] = 0 POS_SENSES[parts_of_speech.X] = 0 POS_SENSES[parts_of_speech.PUNCT] = 0 POS_SENSES[parts_of_speech.EOL] = 0 cdef int _sense = 0 for _sense in range(N_act, V_body): POS_SENSES[parts_of_speech.NOUN] |= 1 << _sense for _sense in range(V_body, V_weather+1): POS_SENSES[parts_of_speech.VERB] |= 1 << _sense STRINGS = ( '-NO_SENSE-', 'N_act', 'N_animal', 'N_artifact', 'N_attribute', 'N_body', 'N_cognition', 'N_communication', 'N_event', 'N_feeling', 'N_food', 'N_group', 'N_location', 'N_motive', 'N_object', 'N_person', 'N_phenomenon', 'N_plant', 'N_possession', 'N_process', 'N_quantity', 'N_relation', 'N_shape', 'N_state', 'N_substance', 'N_time', 'V_body', 'V_change', 'V_cognition', 'V_communication', 'V_competition', 'V_consumption', 'V_contact', 'V_creation', 'V_emotion', 'V_motion', 'V_perception', 'V_possession', 'V_social', 'V_stative', 'V_weather' ) IDS = dict((sense_str, i) for i, sense_str in enumerate(STRINGS)) cdef flags_t encode_sense_strs(sense_names) except 0: cdef flags_t sense_bits = 0 if len(sense_names) == 0: return sense_bits | (1 << NO_SENSE) cdef flags_t sense_id = 0 for sense_str in sense_names: if '.' in sense_str: sense_str = sense_str[0].upper() + '_' + sense_str.split('.')[1] sense_id = IDS[sense_str] sense_bits |= (1 << sense_id) return sense_bits