mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 00:46:28 +03:00
Fix Vectors.n_keys for floret vectors (#10394)
Fix `Vectors.n_keys` for floret vectors to match docstring description and avoid W007 warnings in similarity methods.
This commit is contained in:
parent
3f68bbcfec
commit
8e93fa8507
|
@ -535,6 +535,10 @@ def test_floret_vectors(floret_vectors_vec_str, floret_vectors_hashvec_str):
|
|||
# every word has a vector
|
||||
assert nlp.vocab[word * 5].has_vector
|
||||
|
||||
# n_keys is -1 for floret
|
||||
assert nlp_plain.vocab.vectors.n_keys > 0
|
||||
assert nlp.vocab.vectors.n_keys == -1
|
||||
|
||||
# check that single and batched vector lookups are identical
|
||||
words = [s for s in nlp_plain.vocab.vectors]
|
||||
single_vecs = OPS.to_numpy(OPS.asarray([nlp.vocab[word].vector for word in words]))
|
||||
|
|
|
@ -170,6 +170,8 @@ cdef class Vectors:
|
|||
|
||||
DOCS: https://spacy.io/api/vectors#n_keys
|
||||
"""
|
||||
if self.mode == Mode.floret:
|
||||
return -1
|
||||
return len(self.key2row)
|
||||
|
||||
def __reduce__(self):
|
||||
|
|
|
@ -327,9 +327,9 @@ will be counted individually. In `floret` mode, the keys table is not used.
|
|||
> assert vectors.n_keys == 0
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | -------------------------------------------- |
|
||||
| **RETURNS** | The number of all keys in the table. ~~int~~ |
|
||||
| Name | Description |
|
||||
| ----------- | ----------------------------------------------------------------------------- |
|
||||
| **RETURNS** | The number of all keys in the table. Returns `-1` for floret vectors. ~~int~~ |
|
||||
|
||||
## Vectors.most_similar {#most_similar tag="method"}
|
||||
|
||||
|
@ -348,7 +348,7 @@ supported for `floret` mode.
|
|||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| -------------- | --------------------------------------------------------------------------- |
|
||||
| -------------- | --------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------- |
|
||||
| `queries` | An array with one or more vectors. ~~numpy.ndarray~~ |
|
||||
| _keyword-only_ | |
|
||||
| `batch_size` | The batch size to use. Default to `1024`. ~~int~~ |
|
||||
|
@ -385,7 +385,7 @@ Change the embedding matrix to use different Thinc ops.
|
|||
> ```
|
||||
|
||||
| Name | Description |
|
||||
|-------|----------------------------------------------------------|
|
||||
| ----- | -------------------------------------------------------- |
|
||||
| `ops` | The Thinc ops to switch the embedding matrix to. ~~Ops~~ |
|
||||
|
||||
## Vectors.to_disk {#to_disk tag="method"}
|
||||
|
|
Loading…
Reference in New Issue
Block a user