mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-10 16:22:29 +03:00
Remove backoff from .vector to .tensor (#12292)
This commit is contained in:
parent
e27c60a702
commit
df4c069a13
|
@ -657,9 +657,6 @@ cdef class Doc:
|
||||||
elif self.vocab.vectors.size > 0:
|
elif self.vocab.vectors.size > 0:
|
||||||
self._vector = sum(t.vector for t in self) / len(self)
|
self._vector = sum(t.vector for t in self) / len(self)
|
||||||
return self._vector
|
return self._vector
|
||||||
elif self.tensor.size > 0:
|
|
||||||
self._vector = self.tensor.mean(axis=0)
|
|
||||||
return self._vector
|
|
||||||
else:
|
else:
|
||||||
return xp.zeros((self.vocab.vectors_length,), dtype="float32")
|
return xp.zeros((self.vocab.vectors_length,), dtype="float32")
|
||||||
|
|
||||||
|
|
|
@ -532,8 +532,6 @@ cdef class Span:
|
||||||
return self.doc.user_span_hooks["has_vector"](self)
|
return self.doc.user_span_hooks["has_vector"](self)
|
||||||
elif self.vocab.vectors.size > 0:
|
elif self.vocab.vectors.size > 0:
|
||||||
return any(token.has_vector for token in self)
|
return any(token.has_vector for token in self)
|
||||||
elif self.doc.tensor.size > 0:
|
|
||||||
return True
|
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
|
@ -389,8 +389,6 @@ cdef class Token:
|
||||||
"""
|
"""
|
||||||
if "has_vector" in self.doc.user_token_hooks:
|
if "has_vector" in self.doc.user_token_hooks:
|
||||||
return self.doc.user_token_hooks["has_vector"](self)
|
return self.doc.user_token_hooks["has_vector"](self)
|
||||||
if self.vocab.vectors.size == 0 and self.doc.tensor.size != 0:
|
|
||||||
return True
|
|
||||||
return self.vocab.has_vector(self.c.lex.orth)
|
return self.vocab.has_vector(self.c.lex.orth)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -404,8 +402,6 @@ cdef class Token:
|
||||||
"""
|
"""
|
||||||
if "vector" in self.doc.user_token_hooks:
|
if "vector" in self.doc.user_token_hooks:
|
||||||
return self.doc.user_token_hooks["vector"](self)
|
return self.doc.user_token_hooks["vector"](self)
|
||||||
if self.vocab.vectors.size == 0 and self.doc.tensor.size != 0:
|
|
||||||
return self.doc.tensor[self.i]
|
|
||||||
else:
|
else:
|
||||||
return self.vocab.get_vector(self.c.lex.orth)
|
return self.vocab.get_vector(self.c.lex.orth)
|
||||||
|
|
||||||
|
|
|
@ -22,17 +22,20 @@ array([2.02280000e-01, -7.66180009e-02, 3.70319992e-01,
|
||||||
<Infobox title="Important note" variant="warning">
|
<Infobox title="Important note" variant="warning">
|
||||||
|
|
||||||
To make them compact and fast, spaCy's small [pipeline packages](/models) (all
|
To make them compact and fast, spaCy's small [pipeline packages](/models) (all
|
||||||
packages that end in `sm`) **don't ship with word vectors**, and only include
|
packages that end in `sm`) **don't ship with word vectors**. In order to use
|
||||||
context-sensitive **tensors**. This means you can still use the `similarity()`
|
`similarity()`, you need to download a larger pipeline package that includes
|
||||||
methods to compare documents, spans and tokens – but the result won't be as
|
vectors:
|
||||||
good, and individual tokens won't have any vectors assigned. So in order to use
|
|
||||||
_real_ word vectors, you need to download a larger pipeline package:
|
|
||||||
|
|
||||||
```diff
|
```diff
|
||||||
- python -m spacy download en_core_web_sm
|
- python -m spacy download en_core_web_sm
|
||||||
+ python -m spacy download en_core_web_lg
|
+ python -m spacy download en_core_web_md
|
||||||
```
|
```
|
||||||
|
|
||||||
|
In spaCy v3 and earlier, small pipeline packages supported `similarity()` by
|
||||||
|
backing off to context-sensitive tensors from the `tok2vec` component. These
|
||||||
|
tensors do not work well for this purpose and this backoff has been removed in
|
||||||
|
spaCy v4.
|
||||||
|
|
||||||
</Infobox>
|
</Infobox>
|
||||||
|
|
||||||
Pipeline packages that come with built-in word vectors make them available as
|
Pipeline packages that come with built-in word vectors make them available as
|
||||||
|
|
Loading…
Reference in New Issue
Block a user