mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	* Rename .repvec to .vector in C API
This commit is contained in:
		
							parent
							
								
									f81389abe0
								
							
						
					
					
						commit
						1e99fcd413
					
				| 
						 | 
					@ -51,7 +51,7 @@ cdef class Lexeme:
 | 
				
			||||||
        def __get__(self):
 | 
					        def __get__(self):
 | 
				
			||||||
            cdef int i
 | 
					            cdef int i
 | 
				
			||||||
            for i in range(self.vocab.vectors_length):
 | 
					            for i in range(self.vocab.vectors_length):
 | 
				
			||||||
                if self.c.repvec[i] != 0:
 | 
					                if self.c.vector[i] != 0:
 | 
				
			||||||
                    return True
 | 
					                    return True
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                return False
 | 
					                return False
 | 
				
			||||||
| 
						 | 
					@ -74,14 +74,14 @@ cdef class Lexeme:
 | 
				
			||||||
                    "to install the data."
 | 
					                    "to install the data."
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 
 | 
					 
 | 
				
			||||||
            repvec_view = <float[:length,]>self.c.repvec
 | 
					            vector_view = <float[:length,]>self.c.vector
 | 
				
			||||||
            return numpy.asarray(repvec_view)
 | 
					            return numpy.asarray(vector_view)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        def __set__(self, vector):
 | 
					        def __set__(self, vector):
 | 
				
			||||||
            assert len(vector) == self.vocab.vectors_length
 | 
					            assert len(vector) == self.vocab.vectors_length
 | 
				
			||||||
            cdef float value
 | 
					            cdef float value
 | 
				
			||||||
            for i, value in enumerate(vector):
 | 
					            for i, value in enumerate(vector):
 | 
				
			||||||
                self.c.repvec[i] = value
 | 
					                self.c.vector[i] = value
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    property repvec:
 | 
					    property repvec:
 | 
				
			||||||
        def __get__(self):
 | 
					        def __get__(self):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,7 +5,7 @@ from .parts_of_speech cimport univ_pos_t
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef struct LexemeC:
 | 
					cdef struct LexemeC:
 | 
				
			||||||
    float* repvec
 | 
					    float* vector
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    flags_t flags
 | 
					    flags_t flags
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -134,10 +134,6 @@ cdef class Doc:
 | 
				
			||||||
            return 0.0
 | 
					            return 0.0
 | 
				
			||||||
        return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
 | 
					        return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    property repvec:
 | 
					 | 
				
			||||||
        def __get__(self):
 | 
					 | 
				
			||||||
            return self.vector
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    property vector:
 | 
					    property vector:
 | 
				
			||||||
        def __get__(self):
 | 
					        def __get__(self):
 | 
				
			||||||
            if self._vector is None:
 | 
					            if self._vector is None:
 | 
				
			||||||
| 
						 | 
					@ -399,7 +395,7 @@ cdef class Doc:
 | 
				
			||||||
            elif attr_id == TAG:
 | 
					            elif attr_id == TAG:
 | 
				
			||||||
                for i in range(length):
 | 
					                for i in range(length):
 | 
				
			||||||
                    self.vocab.morphology.assign_tag(&tokens[i],
 | 
					                    self.vocab.morphology.assign_tag(&tokens[i],
 | 
				
			||||||
                                                     self.vocab.strings[values[i]])
 | 
					                                self.vocab.morphology.reverse_index[values[i]])
 | 
				
			||||||
                    if not self.is_tagged and tokens[i].tag != 0:
 | 
					                    if not self.is_tagged and tokens[i].tag != 0:
 | 
				
			||||||
                        self.is_tagged = True
 | 
					                        self.is_tagged = True
 | 
				
			||||||
            elif attr_id == POS:
 | 
					            elif attr_id == POS:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -143,7 +143,7 @@ cdef class Token:
 | 
				
			||||||
        def __get__(self):
 | 
					        def __get__(self):
 | 
				
			||||||
            cdef int i
 | 
					            cdef int i
 | 
				
			||||||
            for i in range(self.vocab.vectors_length):
 | 
					            for i in range(self.vocab.vectors_length):
 | 
				
			||||||
                if self.c.lex.repvec[i] != 0:
 | 
					                if self.c.lex.vector[i] != 0:
 | 
				
			||||||
                    return True
 | 
					                    return True
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                return False
 | 
					                return False
 | 
				
			||||||
| 
						 | 
					@ -158,8 +158,8 @@ cdef class Token:
 | 
				
			||||||
                    "\npython -m spacy.en.download all\n"
 | 
					                    "\npython -m spacy.en.download all\n"
 | 
				
			||||||
                    "to install the data."
 | 
					                    "to install the data."
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
            repvec_view = <float[:length,]>self.c.lex.repvec
 | 
					            vector_view = <float[:length,]>self.c.lex.vector
 | 
				
			||||||
            return numpy.asarray(repvec_view)
 | 
					            return numpy.asarray(vector_view)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    property repvec:
 | 
					    property repvec:
 | 
				
			||||||
        def __get__(self):
 | 
					        def __get__(self):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -40,7 +40,7 @@ DEF MAX_VEC_SIZE = 100000
 | 
				
			||||||
cdef float[MAX_VEC_SIZE] EMPTY_VEC
 | 
					cdef float[MAX_VEC_SIZE] EMPTY_VEC
 | 
				
			||||||
memset(EMPTY_VEC, 0, sizeof(EMPTY_VEC))
 | 
					memset(EMPTY_VEC, 0, sizeof(EMPTY_VEC))
 | 
				
			||||||
memset(&EMPTY_LEXEME, 0, sizeof(LexemeC))
 | 
					memset(&EMPTY_LEXEME, 0, sizeof(LexemeC))
 | 
				
			||||||
EMPTY_LEXEME.repvec = EMPTY_VEC
 | 
					EMPTY_LEXEME.vector = EMPTY_VEC
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef class Vocab:
 | 
					cdef class Vocab:
 | 
				
			||||||
| 
						 | 
					@ -162,7 +162,7 @@ cdef class Vocab:
 | 
				
			||||||
        lex.orth = self.strings[string]
 | 
					        lex.orth = self.strings[string]
 | 
				
			||||||
        lex.length = len(string)
 | 
					        lex.length = len(string)
 | 
				
			||||||
        lex.id = self.length
 | 
					        lex.id = self.length
 | 
				
			||||||
        lex.repvec = <float*>mem.alloc(self.vectors_length, sizeof(float))
 | 
					        lex.vector = <float*>mem.alloc(self.vectors_length, sizeof(float))
 | 
				
			||||||
        if self.get_lex_attr is not None:
 | 
					        if self.get_lex_attr is not None:
 | 
				
			||||||
            for attr, func in self.get_lex_attr.items():
 | 
					            for attr, func in self.get_lex_attr.items():
 | 
				
			||||||
                value = func(string)
 | 
					                value = func(string)
 | 
				
			||||||
| 
						 | 
					@ -287,7 +287,7 @@ cdef class Vocab:
 | 
				
			||||||
            fp.read_into(&lexeme.sentiment, 1, sizeof(lexeme.sentiment))
 | 
					            fp.read_into(&lexeme.sentiment, 1, sizeof(lexeme.sentiment))
 | 
				
			||||||
            fp.read_into(&lexeme.l2_norm, 1, sizeof(lexeme.l2_norm))
 | 
					            fp.read_into(&lexeme.l2_norm, 1, sizeof(lexeme.l2_norm))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            lexeme.repvec = EMPTY_VEC
 | 
					            lexeme.vector = EMPTY_VEC
 | 
				
			||||||
            py_str = self.strings[lexeme.orth]
 | 
					            py_str = self.strings[lexeme.orth]
 | 
				
			||||||
            key = hash_string(py_str)
 | 
					            key = hash_string(py_str)
 | 
				
			||||||
            self._by_hash.set(key, lexeme)
 | 
					            self._by_hash.set(key, lexeme)
 | 
				
			||||||
| 
						 | 
					@ -306,7 +306,7 @@ cdef class Vocab:
 | 
				
			||||||
        cdef CFile out_file = CFile(out_loc, 'wb')
 | 
					        cdef CFile out_file = CFile(out_loc, 'wb')
 | 
				
			||||||
        for lexeme in self:
 | 
					        for lexeme in self:
 | 
				
			||||||
            word_str = lexeme.orth_.encode('utf8')
 | 
					            word_str = lexeme.orth_.encode('utf8')
 | 
				
			||||||
            vec = lexeme.c.repvec
 | 
					            vec = lexeme.c.vector
 | 
				
			||||||
            word_len = len(word_str)
 | 
					            word_len = len(word_str)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            out_file.write_from(&word_len, 1, sizeof(word_len))
 | 
					            out_file.write_from(&word_len, 1, sizeof(word_len))
 | 
				
			||||||
| 
						 | 
					@ -331,10 +331,10 @@ cdef class Vocab:
 | 
				
			||||||
                                                        vec_len, len(pieces))
 | 
					                                                        vec_len, len(pieces))
 | 
				
			||||||
            orth = self.strings[word_str]
 | 
					            orth = self.strings[word_str]
 | 
				
			||||||
            lexeme = <LexemeC*><void*>self.get_by_orth(self.mem, orth)
 | 
					            lexeme = <LexemeC*><void*>self.get_by_orth(self.mem, orth)
 | 
				
			||||||
            lexeme.repvec = <float*>self.mem.alloc(self.vectors_length, sizeof(float))
 | 
					            lexeme.vector = <float*>self.mem.alloc(self.vectors_length, sizeof(float))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            for i, val_str in enumerate(pieces):
 | 
					            for i, val_str in enumerate(pieces):
 | 
				
			||||||
                lexeme.repvec[i] = float(val_str)
 | 
					                lexeme.vector[i] = float(val_str)
 | 
				
			||||||
        return vec_len
 | 
					        return vec_len
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def load_vectors_from_bin_loc(self, loc):
 | 
					    def load_vectors_from_bin_loc(self, loc):
 | 
				
			||||||
| 
						 | 
					@ -376,12 +376,12 @@ cdef class Vocab:
 | 
				
			||||||
        for orth, lex_addr in self._by_orth.items():
 | 
					        for orth, lex_addr in self._by_orth.items():
 | 
				
			||||||
            lex = <LexemeC*>lex_addr
 | 
					            lex = <LexemeC*>lex_addr
 | 
				
			||||||
            if lex.lower < vectors.size():
 | 
					            if lex.lower < vectors.size():
 | 
				
			||||||
                lex.repvec = vectors[lex.lower]
 | 
					                lex.vector = vectors[lex.lower]
 | 
				
			||||||
                for i in range(vec_len):
 | 
					                for i in range(vec_len):
 | 
				
			||||||
                    lex.l2_norm += (lex.repvec[i] * lex.repvec[i])
 | 
					                    lex.l2_norm += (lex.vector[i] * lex.vector[i])
 | 
				
			||||||
                lex.l2_norm = math.sqrt(lex.l2_norm)
 | 
					                lex.l2_norm = math.sqrt(lex.l2_norm)
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                lex.repvec = EMPTY_VEC
 | 
					                lex.vector = EMPTY_VEC
 | 
				
			||||||
        return vec_len
 | 
					        return vec_len
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user