mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Remove backoff from .vector to .tensor (#12292)
This commit is contained in:
		
							parent
							
								
									e27c60a702
								
							
						
					
					
						commit
						df4c069a13
					
				|  | @ -657,9 +657,6 @@ cdef class Doc: | ||||||
|             elif self.vocab.vectors.size > 0: |             elif self.vocab.vectors.size > 0: | ||||||
|                 self._vector = sum(t.vector for t in self) / len(self) |                 self._vector = sum(t.vector for t in self) / len(self) | ||||||
|                 return self._vector |                 return self._vector | ||||||
|             elif self.tensor.size > 0: |  | ||||||
|                 self._vector = self.tensor.mean(axis=0) |  | ||||||
|                 return self._vector |  | ||||||
|             else: |             else: | ||||||
|                 return xp.zeros((self.vocab.vectors_length,), dtype="float32") |                 return xp.zeros((self.vocab.vectors_length,), dtype="float32") | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -532,8 +532,6 @@ cdef class Span: | ||||||
|             return self.doc.user_span_hooks["has_vector"](self) |             return self.doc.user_span_hooks["has_vector"](self) | ||||||
|         elif self.vocab.vectors.size > 0: |         elif self.vocab.vectors.size > 0: | ||||||
|             return any(token.has_vector for token in self) |             return any(token.has_vector for token in self) | ||||||
|         elif self.doc.tensor.size > 0: |  | ||||||
|             return True |  | ||||||
|         else: |         else: | ||||||
|             return False |             return False | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -389,8 +389,6 @@ cdef class Token: | ||||||
|         """ |         """ | ||||||
|         if "has_vector" in self.doc.user_token_hooks: |         if "has_vector" in self.doc.user_token_hooks: | ||||||
|             return self.doc.user_token_hooks["has_vector"](self) |             return self.doc.user_token_hooks["has_vector"](self) | ||||||
|         if self.vocab.vectors.size == 0 and self.doc.tensor.size != 0: |  | ||||||
|             return True |  | ||||||
|         return self.vocab.has_vector(self.c.lex.orth) |         return self.vocab.has_vector(self.c.lex.orth) | ||||||
| 
 | 
 | ||||||
|     @property |     @property | ||||||
|  | @ -404,8 +402,6 @@ cdef class Token: | ||||||
|         """ |         """ | ||||||
|         if "vector" in self.doc.user_token_hooks: |         if "vector" in self.doc.user_token_hooks: | ||||||
|             return self.doc.user_token_hooks["vector"](self) |             return self.doc.user_token_hooks["vector"](self) | ||||||
|         if self.vocab.vectors.size == 0 and self.doc.tensor.size != 0: |  | ||||||
|             return self.doc.tensor[self.i] |  | ||||||
|         else: |         else: | ||||||
|             return self.vocab.get_vector(self.c.lex.orth) |             return self.vocab.get_vector(self.c.lex.orth) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -22,17 +22,20 @@ array([2.02280000e-01,  -7.66180009e-02,   3.70319992e-01, | ||||||
| <Infobox title="Important note" variant="warning"> | <Infobox title="Important note" variant="warning"> | ||||||
| 
 | 
 | ||||||
| To make them compact and fast, spaCy's small [pipeline packages](/models) (all | To make them compact and fast, spaCy's small [pipeline packages](/models) (all | ||||||
| packages that end in `sm`) **don't ship with word vectors**, and only include | packages that end in `sm`) **don't ship with word vectors**. In order to use | ||||||
| context-sensitive **tensors**. This means you can still use the `similarity()` | `similarity()`, you need to download a larger pipeline package that includes | ||||||
| methods to compare documents, spans and tokens – but the result won't be as | vectors: | ||||||
| good, and individual tokens won't have any vectors assigned. So in order to use |  | ||||||
| _real_ word vectors, you need to download a larger pipeline package: |  | ||||||
| 
 | 
 | ||||||
| ```diff | ```diff | ||||||
| - python -m spacy download en_core_web_sm | - python -m spacy download en_core_web_sm | ||||||
| + python -m spacy download en_core_web_lg | + python -m spacy download en_core_web_md | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
|  | In spaCy v3 and earlier, small pipeline packages supported `similarity()` by | ||||||
|  | backing off to context-sensitive tensors from the `tok2vec` component. These | ||||||
|  | tensors do not work well for this purpose and this backoff has been removed in | ||||||
|  | spaCy v4. | ||||||
|  | 
 | ||||||
| </Infobox> | </Infobox> | ||||||
| 
 | 
 | ||||||
| Pipeline packages that come with built-in word vectors make them available as | Pipeline packages that come with built-in word vectors make them available as | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user