* Remove adjectives from supersense list. This seems to be associated with current memory errors

This commit is contained in:
Matthew Honnibal 2015-07-03 09:24:45 +02:00
parent 12dd4f745a
commit 68f174b235
2 changed files with 8 additions and 39 deletions

View File

@ -3,19 +3,7 @@ cimport parts_of_speech
from .typedefs cimport flags_t from .typedefs cimport flags_t
cpdef enum: cpdef enum:
A_behavior NO_SENSE
A_body
A_feeling
A_mind
A_motion
A_perception
A_quantity
A_relation
A_social
A_spatial
A_substance
A_time
A_weather
N_act N_act
N_animal N_animal
N_artifact N_artifact
@ -59,11 +47,7 @@ cpdef enum:
N_SENSES N_SENSES
cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES
# Can we not define this compile time? Have to hard code? :( cdef flags_t encode_sense_strs(sense_names) except 0
# DEF INVALID_SENSE = 1 << 54
cdef flags_t encode_sense_strs(sense_names) except 18014398509481984

View File

@ -20,9 +20,6 @@ POS_SENSES[<int>parts_of_speech.EOL] = 0
cdef int _sense = 0 cdef int _sense = 0
for _sense in range(A_behavior, N_act):
POS_SENSES[<int>parts_of_speech.ADJ] |= 1 << _sense
for _sense in range(N_act, V_body): for _sense in range(N_act, V_body):
POS_SENSES[<int>parts_of_speech.NOUN] |= 1 << _sense POS_SENSES[<int>parts_of_speech.NOUN] |= 1 << _sense
@ -30,21 +27,8 @@ for _sense in range(V_body, V_weather+1):
POS_SENSES[<int>parts_of_speech.VERB] |= 1 << _sense POS_SENSES[<int>parts_of_speech.VERB] |= 1 << _sense
STRINGS = ( STRINGS = (
'A_behavior', '-NO_SENSE-',
'A_body',
'A_feeling',
'A_mind',
'A_motion',
'A_perception',
'A_quantity',
'A_relation',
'A_social',
'A_spatial',
'A_substance',
'A_time',
'A_weather',
'N_act', 'N_act',
'N_animal', 'N_animal',
'N_artifact', 'N_artifact',
@ -90,13 +74,14 @@ STRINGS = (
IDS = dict((sense_str, i) for i, sense_str in enumerate(STRINGS)) IDS = dict((sense_str, i) for i, sense_str in enumerate(STRINGS))
cdef flags_t encode_sense_strs(sense_names) except 18014398509481984: cdef flags_t encode_sense_strs(sense_names) except 0:
cdef flags_t sense_bits = 0 cdef flags_t sense_bits = 0
if len(sense_names) == 0:
return sense_bits | (1 << NO_SENSE)
cdef flags_t sense_id = 0 cdef flags_t sense_id = 0
for sense_str in sense_names: for sense_str in sense_names:
if '.' in sense_str: if '.' in sense_str:
sense_str = sense_str[0].upper() + '_' + sense_str.split('.')[1] sense_str = sense_str[0].upper() + '_' + sense_str.split('.')[1]
if sense_str != 'N_Tops': sense_id = IDS[sense_str]
sense_id = IDS[sense_str] sense_bits |= (1 << sense_id)
sense_bits |= (1 << sense_id)
return sense_bits return sense_bits