mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
* Add encode_sense_strs function
This commit is contained in:
parent
1be5ab200f
commit
4a60b68a24
|
@ -59,5 +59,11 @@ cpdef enum:
|
||||||
N_SENSES
|
N_SENSES
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES
|
cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES
|
||||||
|
|
||||||
|
|
||||||
|
# Can we not define this compile time? Have to hard code? :(
|
||||||
|
# DEF INVALID_SENSE = 1 << 54
|
||||||
|
cdef flags_t encode_sense_strs(sense_names) except 18014398509481984
|
||||||
|
|
|
@ -86,3 +86,17 @@ STRINGS = (
|
||||||
'V_stative',
|
'V_stative',
|
||||||
'V_weather'
|
'V_weather'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
IDS = dict((sense_str, i) for i, sense_str in enumerate(STRINGS))
|
||||||
|
|
||||||
|
|
||||||
|
cdef flags_t encode_sense_strs(sense_names) except 18014398509481984:
|
||||||
|
cdef flags_t sense_bits = 0
|
||||||
|
cdef flags_t sense_id = 0
|
||||||
|
for sense_str in sense_names:
|
||||||
|
if '.' in sense_str:
|
||||||
|
sense_str = sense_str[0].upper() + '_' + sense_str.split('.')[1]
|
||||||
|
if sense_str != 'N_Tops':
|
||||||
|
sense_id = IDS[sense_str]
|
||||||
|
sense_bits |= (1 << sense_id)
|
||||||
|
return sense_bits
|
||||||
|
|
Loading…
Reference in New Issue
Block a user