mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-15 06:09:01 +03:00
70 lines
1.7 KiB
Cython
70 lines
1.7 KiB
Cython
from __future__ import unicode_literals
|
|
cimport parts_of_speech
|
|
|
|
|
|
lexnames_str = """
|
|
-1 NO_SENSE -1
|
|
00 J_all 3
|
|
01 A_pert 3
|
|
02 A_all 4
|
|
03 N_Tops 1
|
|
04 N_act 1
|
|
05 N_animal 1
|
|
06 N_artifact 1
|
|
07 N_attribute 1
|
|
08 N_body 1
|
|
09 N_cognition 1
|
|
10 N_communication 1
|
|
11 N_event 1
|
|
12 N_feeling 1
|
|
13 N_food 1
|
|
14 N_group 1
|
|
15 N_location 1
|
|
16 N_motive 1
|
|
17 N_object 1
|
|
18 N_person 1
|
|
19 N_phenomenon 1
|
|
20 N_plant 1
|
|
21 N_possession 1
|
|
22 N_process 1
|
|
23 N_quantity 1
|
|
24 N_relation 1
|
|
25 N_shape 1
|
|
26 N_state 1
|
|
27 N_substance 1
|
|
28 N_time 1
|
|
29 V_body 2
|
|
30 V_change 2
|
|
31 V_cognition 2
|
|
32 V_communication 2
|
|
33 V_competition 2
|
|
34 V_consumption 2
|
|
35 V_contact 2
|
|
36 V_creation 2
|
|
37 V_emotion 2
|
|
38 V_motion 2
|
|
39 V_perception 2
|
|
40 V_possession 2
|
|
41 V_social 2
|
|
42 V_stative 2
|
|
43 V_weather 2
|
|
44 A_ppl 3
|
|
""".strip()
|
|
|
|
STRINGS = tuple(line.split()[1] for line in lexnames_str.split('\n'))
|
|
|
|
IDS = dict((sense_str, i) for i, sense_str in enumerate(STRINGS))
|
|
|
|
|
|
cdef flags_t encode_sense_strs(sense_names) except 0:
|
|
cdef flags_t sense_bits = 0
|
|
if len(sense_names) == 0:
|
|
return sense_bits | (1 << NO_SENSE)
|
|
cdef flags_t sense_id = 0
|
|
for sense_str in sense_names:
|
|
sense_str = sense_str.replace('noun', 'N').replace('verb', 'V')
|
|
sense_str = sense_str.replace('adj', 'J').replace('adv', 'A')
|
|
sense_id = IDS[sense_str]
|
|
sense_bits |= (1 << sense_id)
|
|
return sense_bits
|