mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* Remove adjectives from supersense list. This seems to be associated with current memory errors
This commit is contained in:
		
							parent
							
								
									12dd4f745a
								
							
						
					
					
						commit
						68f174b235
					
				| 
						 | 
					@ -3,19 +3,7 @@ cimport parts_of_speech
 | 
				
			||||||
from .typedefs cimport flags_t
 | 
					from .typedefs cimport flags_t
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cpdef enum:
 | 
					cpdef enum:
 | 
				
			||||||
    A_behavior
 | 
					    NO_SENSE
 | 
				
			||||||
    A_body
 | 
					 | 
				
			||||||
    A_feeling
 | 
					 | 
				
			||||||
    A_mind
 | 
					 | 
				
			||||||
    A_motion
 | 
					 | 
				
			||||||
    A_perception
 | 
					 | 
				
			||||||
    A_quantity
 | 
					 | 
				
			||||||
    A_relation
 | 
					 | 
				
			||||||
    A_social
 | 
					 | 
				
			||||||
    A_spatial
 | 
					 | 
				
			||||||
    A_substance
 | 
					 | 
				
			||||||
    A_time
 | 
					 | 
				
			||||||
    A_weather
 | 
					 | 
				
			||||||
    N_act
 | 
					    N_act
 | 
				
			||||||
    N_animal
 | 
					    N_animal
 | 
				
			||||||
    N_artifact
 | 
					    N_artifact
 | 
				
			||||||
| 
						 | 
					@ -59,11 +47,7 @@ cpdef enum:
 | 
				
			||||||
    N_SENSES
 | 
					    N_SENSES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES
 | 
					cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Can we not define this compile time? Have to hard code? :(
 | 
					cdef flags_t encode_sense_strs(sense_names) except 0
 | 
				
			||||||
# DEF INVALID_SENSE = 1 << 54
 | 
					 | 
				
			||||||
cdef flags_t encode_sense_strs(sense_names) except 18014398509481984
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -20,9 +20,6 @@ POS_SENSES[<int>parts_of_speech.EOL] = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef int _sense = 0
 | 
					cdef int _sense = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
for _sense in range(A_behavior, N_act):
 | 
					 | 
				
			||||||
    POS_SENSES[<int>parts_of_speech.ADJ] |= 1 << _sense
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
for _sense in range(N_act, V_body):
 | 
					for _sense in range(N_act, V_body):
 | 
				
			||||||
    POS_SENSES[<int>parts_of_speech.NOUN] |= 1 << _sense
 | 
					    POS_SENSES[<int>parts_of_speech.NOUN] |= 1 << _sense
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -30,21 +27,8 @@ for _sense in range(V_body, V_weather+1):
 | 
				
			||||||
    POS_SENSES[<int>parts_of_speech.VERB] |= 1 << _sense
 | 
					    POS_SENSES[<int>parts_of_speech.VERB] |= 1 << _sense
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
STRINGS = (
 | 
					STRINGS = (
 | 
				
			||||||
    'A_behavior',
 | 
					    '-NO_SENSE-',
 | 
				
			||||||
    'A_body',
 | 
					 | 
				
			||||||
    'A_feeling',
 | 
					 | 
				
			||||||
    'A_mind',
 | 
					 | 
				
			||||||
    'A_motion',
 | 
					 | 
				
			||||||
    'A_perception',
 | 
					 | 
				
			||||||
    'A_quantity',
 | 
					 | 
				
			||||||
    'A_relation',
 | 
					 | 
				
			||||||
    'A_social',
 | 
					 | 
				
			||||||
    'A_spatial',
 | 
					 | 
				
			||||||
    'A_substance',
 | 
					 | 
				
			||||||
    'A_time',
 | 
					 | 
				
			||||||
    'A_weather',
 | 
					 | 
				
			||||||
    'N_act',
 | 
					    'N_act',
 | 
				
			||||||
    'N_animal',
 | 
					    'N_animal',
 | 
				
			||||||
    'N_artifact',
 | 
					    'N_artifact',
 | 
				
			||||||
| 
						 | 
					@ -90,13 +74,14 @@ STRINGS = (
 | 
				
			||||||
IDS = dict((sense_str, i) for i, sense_str in enumerate(STRINGS))
 | 
					IDS = dict((sense_str, i) for i, sense_str in enumerate(STRINGS))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef flags_t encode_sense_strs(sense_names) except 18014398509481984:
 | 
					cdef flags_t encode_sense_strs(sense_names) except 0:
 | 
				
			||||||
    cdef flags_t sense_bits = 0
 | 
					    cdef flags_t sense_bits = 0
 | 
				
			||||||
 | 
					    if len(sense_names) == 0:
 | 
				
			||||||
 | 
					        return sense_bits | (1 << NO_SENSE)
 | 
				
			||||||
    cdef flags_t sense_id = 0
 | 
					    cdef flags_t sense_id = 0
 | 
				
			||||||
    for sense_str in sense_names:
 | 
					    for sense_str in sense_names:
 | 
				
			||||||
        if '.' in sense_str:
 | 
					        if '.' in sense_str:
 | 
				
			||||||
            sense_str = sense_str[0].upper() + '_' + sense_str.split('.')[1]
 | 
					            sense_str = sense_str[0].upper() + '_' + sense_str.split('.')[1]
 | 
				
			||||||
        if sense_str != 'N_Tops':
 | 
					        sense_id = IDS[sense_str]
 | 
				
			||||||
            sense_id = IDS[sense_str]
 | 
					        sense_bits |= (1 << sense_id)
 | 
				
			||||||
            sense_bits |= (1 << sense_id)
 | 
					 | 
				
			||||||
    return sense_bits
 | 
					    return sense_bits
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user