From 4a60b68a2461932c60b066b9cf713f0f73c60dc3 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 3 Jul 2015 05:45:16 +0200 Subject: [PATCH] * Add encode_sense_strs function --- spacy/senses.pxd | 6 ++++++ spacy/senses.pyx | 14 ++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/spacy/senses.pxd b/spacy/senses.pxd index 282f5f590..392a92e05 100644 --- a/spacy/senses.pxd +++ b/spacy/senses.pxd @@ -59,5 +59,11 @@ cpdef enum: N_SENSES + + cdef flags_t[parts_of_speech.N_UNIV_TAGS] POS_SENSES + +# Can we not define this compile time? Have to hard code? :( +# DEF INVALID_SENSE = 1 << 54 +cdef flags_t encode_sense_strs(sense_names) except 18014398509481984 diff --git a/spacy/senses.pyx b/spacy/senses.pyx index 99d193774..de6e19300 100644 --- a/spacy/senses.pyx +++ b/spacy/senses.pyx @@ -86,3 +86,17 @@ STRINGS = ( 'V_stative', 'V_weather' ) + +IDS = dict((sense_str, i) for i, sense_str in enumerate(STRINGS)) + + +cdef flags_t encode_sense_strs(sense_names) except 18014398509481984: + cdef flags_t sense_bits = 0 + cdef flags_t sense_id = 0 + for sense_str in sense_names: + if '.' in sense_str: + sense_str = sense_str[0].upper() + '_' + sense_str.split('.')[1] + if sense_str != 'N_Tops': + sense_id = IDS[sense_str] + sense_bits |= (1 << sense_id) + return sense_bits