mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 05:37:03 +03:00
* Add encode_sense_strs function
This commit is contained in:
parent
1be5ab200f
commit
4a60b68a24
|
@ -59,5 +59,11 @@ cpdef enum:
|
|||
N_SENSES
|
||||
|
||||
|
||||
|
||||
|
||||
cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES
|
||||
|
||||
|
||||
# Can we not define this compile time? Have to hard code? :(
|
||||
# DEF INVALID_SENSE = 1 << 54
|
||||
cdef flags_t encode_sense_strs(sense_names) except 18014398509481984
|
||||
|
|
|
@ -86,3 +86,17 @@ STRINGS = (
|
|||
'V_stative',
|
||||
'V_weather'
|
||||
)
|
||||
|
||||
IDS = dict((sense_str, i) for i, sense_str in enumerate(STRINGS))
|
||||
|
||||
|
||||
cdef flags_t encode_sense_strs(sense_names) except 18014398509481984:
|
||||
cdef flags_t sense_bits = 0
|
||||
cdef flags_t sense_id = 0
|
||||
for sense_str in sense_names:
|
||||
if '.' in sense_str:
|
||||
sense_str = sense_str[0].upper() + '_' + sense_str.split('.')[1]
|
||||
if sense_str != 'N_Tops':
|
||||
sense_id = IDS[sense_str]
|
||||
sense_bits |= (1 << sense_id)
|
||||
return sense_bits
|
||||
|
|
Loading…
Reference in New Issue
Block a user