mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-15 06:09:01 +03:00
88 lines
2.0 KiB
Cython
88 lines
2.0 KiB
Cython
from __future__ import unicode_literals
|
|
cimport parts_of_speech
|
|
|
|
|
|
POS_SENSES[<int>parts_of_speech.NO_TAG] = 0
|
|
POS_SENSES[<int>parts_of_speech.ADJ] = 0
|
|
POS_SENSES[<int>parts_of_speech.ADV] = 0
|
|
POS_SENSES[<int>parts_of_speech.ADP] = 0
|
|
POS_SENSES[<int>parts_of_speech.CONJ] = 0
|
|
POS_SENSES[<int>parts_of_speech.DET] = 0
|
|
POS_SENSES[<int>parts_of_speech.NOUN] = 0
|
|
POS_SENSES[<int>parts_of_speech.NUM] = 0
|
|
POS_SENSES[<int>parts_of_speech.PRON] = 0
|
|
POS_SENSES[<int>parts_of_speech.PRT] = 0
|
|
POS_SENSES[<int>parts_of_speech.VERB] = 0
|
|
POS_SENSES[<int>parts_of_speech.X] = 0
|
|
POS_SENSES[<int>parts_of_speech.PUNCT] = 0
|
|
POS_SENSES[<int>parts_of_speech.EOL] = 0
|
|
|
|
|
|
cdef int _sense = 0
|
|
|
|
for _sense in range(N_act, V_body):
|
|
POS_SENSES[<int>parts_of_speech.NOUN] |= 1 << _sense
|
|
|
|
for _sense in range(V_body, V_weather+1):
|
|
POS_SENSES[<int>parts_of_speech.VERB] |= 1 << _sense
|
|
|
|
|
|
STRINGS = (
|
|
'-NO_SENSE-',
|
|
'N_act',
|
|
'N_animal',
|
|
'N_artifact',
|
|
'N_attribute',
|
|
'N_body',
|
|
'N_cognition',
|
|
'N_communication',
|
|
'N_event',
|
|
'N_feeling',
|
|
'N_food',
|
|
'N_group',
|
|
'N_location',
|
|
'N_motive',
|
|
'N_object',
|
|
'N_person',
|
|
'N_phenomenon',
|
|
'N_plant',
|
|
'N_possession',
|
|
'N_process',
|
|
'N_quantity',
|
|
'N_relation',
|
|
'N_shape',
|
|
'N_state',
|
|
'N_substance',
|
|
'N_time',
|
|
'V_body',
|
|
'V_change',
|
|
'V_cognition',
|
|
'V_communication',
|
|
'V_competition',
|
|
'V_consumption',
|
|
'V_contact',
|
|
'V_creation',
|
|
'V_emotion',
|
|
'V_motion',
|
|
'V_perception',
|
|
'V_possession',
|
|
'V_social',
|
|
'V_stative',
|
|
'V_weather'
|
|
)
|
|
|
|
IDS = dict((sense_str, i) for i, sense_str in enumerate(STRINGS))
|
|
|
|
|
|
cdef flags_t encode_sense_strs(sense_names) except 0:
|
|
cdef flags_t sense_bits = 0
|
|
if len(sense_names) == 0:
|
|
return sense_bits | (1 << NO_SENSE)
|
|
cdef flags_t sense_id = 0
|
|
for sense_str in sense_names:
|
|
if '.' in sense_str:
|
|
sense_str = sense_str[0].upper() + '_' + sense_str.split('.')[1]
|
|
sense_id = IDS[sense_str]
|
|
sense_bits |= (1 << sense_id)
|
|
return sense_bits
|