mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
* Remove adjectives from supersense list. This seems to be associated with current memory errors
This commit is contained in:
parent
12dd4f745a
commit
68f174b235
|
@ -3,19 +3,7 @@ cimport parts_of_speech
|
||||||
from .typedefs cimport flags_t
|
from .typedefs cimport flags_t
|
||||||
|
|
||||||
cpdef enum:
|
cpdef enum:
|
||||||
A_behavior
|
NO_SENSE
|
||||||
A_body
|
|
||||||
A_feeling
|
|
||||||
A_mind
|
|
||||||
A_motion
|
|
||||||
A_perception
|
|
||||||
A_quantity
|
|
||||||
A_relation
|
|
||||||
A_social
|
|
||||||
A_spatial
|
|
||||||
A_substance
|
|
||||||
A_time
|
|
||||||
A_weather
|
|
||||||
N_act
|
N_act
|
||||||
N_animal
|
N_animal
|
||||||
N_artifact
|
N_artifact
|
||||||
|
@ -59,11 +47,7 @@ cpdef enum:
|
||||||
N_SENSES
|
N_SENSES
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES
|
cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES
|
||||||
|
|
||||||
|
|
||||||
# Can we not define this compile time? Have to hard code? :(
|
cdef flags_t encode_sense_strs(sense_names) except 0
|
||||||
# DEF INVALID_SENSE = 1 << 54
|
|
||||||
cdef flags_t encode_sense_strs(sense_names) except 18014398509481984
|
|
||||||
|
|
|
@ -20,9 +20,6 @@ POS_SENSES[<int>parts_of_speech.EOL] = 0
|
||||||
|
|
||||||
cdef int _sense = 0
|
cdef int _sense = 0
|
||||||
|
|
||||||
for _sense in range(A_behavior, N_act):
|
|
||||||
POS_SENSES[<int>parts_of_speech.ADJ] |= 1 << _sense
|
|
||||||
|
|
||||||
for _sense in range(N_act, V_body):
|
for _sense in range(N_act, V_body):
|
||||||
POS_SENSES[<int>parts_of_speech.NOUN] |= 1 << _sense
|
POS_SENSES[<int>parts_of_speech.NOUN] |= 1 << _sense
|
||||||
|
|
||||||
|
@ -30,21 +27,8 @@ for _sense in range(V_body, V_weather+1):
|
||||||
POS_SENSES[<int>parts_of_speech.VERB] |= 1 << _sense
|
POS_SENSES[<int>parts_of_speech.VERB] |= 1 << _sense
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
STRINGS = (
|
STRINGS = (
|
||||||
'A_behavior',
|
'-NO_SENSE-',
|
||||||
'A_body',
|
|
||||||
'A_feeling',
|
|
||||||
'A_mind',
|
|
||||||
'A_motion',
|
|
||||||
'A_perception',
|
|
||||||
'A_quantity',
|
|
||||||
'A_relation',
|
|
||||||
'A_social',
|
|
||||||
'A_spatial',
|
|
||||||
'A_substance',
|
|
||||||
'A_time',
|
|
||||||
'A_weather',
|
|
||||||
'N_act',
|
'N_act',
|
||||||
'N_animal',
|
'N_animal',
|
||||||
'N_artifact',
|
'N_artifact',
|
||||||
|
@ -90,13 +74,14 @@ STRINGS = (
|
||||||
IDS = dict((sense_str, i) for i, sense_str in enumerate(STRINGS))
|
IDS = dict((sense_str, i) for i, sense_str in enumerate(STRINGS))
|
||||||
|
|
||||||
|
|
||||||
cdef flags_t encode_sense_strs(sense_names) except 18014398509481984:
|
cdef flags_t encode_sense_strs(sense_names) except 0:
|
||||||
cdef flags_t sense_bits = 0
|
cdef flags_t sense_bits = 0
|
||||||
|
if len(sense_names) == 0:
|
||||||
|
return sense_bits | (1 << NO_SENSE)
|
||||||
cdef flags_t sense_id = 0
|
cdef flags_t sense_id = 0
|
||||||
for sense_str in sense_names:
|
for sense_str in sense_names:
|
||||||
if '.' in sense_str:
|
if '.' in sense_str:
|
||||||
sense_str = sense_str[0].upper() + '_' + sense_str.split('.')[1]
|
sense_str = sense_str[0].upper() + '_' + sense_str.split('.')[1]
|
||||||
if sense_str != 'N_Tops':
|
sense_id = IDS[sense_str]
|
||||||
sense_id = IDS[sense_str]
|
sense_bits |= (1 << sense_id)
|
||||||
sense_bits |= (1 << sense_id)
|
|
||||||
return sense_bits
|
return sense_bits
|
||||||
|
|
Loading…
Reference in New Issue
Block a user