mirror of
https://github.com/explosion/spaCy.git
synced 2025-05-30 02:33:07 +03:00
Update docstrings and formatting
This commit is contained in:
parent
0de8d213a3
commit
ba2e6c8c6f
|
@ -28,7 +28,7 @@ cdef class Vectors:
|
||||||
instance of numpy.ndarray (for CPU vectors) or cupy.ndarray
|
instance of numpy.ndarray (for CPU vectors) or cupy.ndarray
|
||||||
(for GPU vectors). `vectors.key2row` is a dictionary mapping word hashes to
|
(for GPU vectors). `vectors.key2row` is a dictionary mapping word hashes to
|
||||||
rows in the vectors.data table.
|
rows in the vectors.data table.
|
||||||
|
|
||||||
Multiple keys can be mapped to the same vector, and not all of the rows in
|
Multiple keys can be mapped to the same vector, and not all of the rows in
|
||||||
the table need to be assigned --- so len(list(vectors.keys())) may be
|
the table need to be assigned --- so len(list(vectors.keys())) may be
|
||||||
greater or smaller than vectors.shape[0].
|
greater or smaller than vectors.shape[0].
|
||||||
|
@ -39,9 +39,10 @@ cdef class Vectors:
|
||||||
|
|
||||||
def __init__(self, *, shape=None, data=None, keys=None):
|
def __init__(self, *, shape=None, data=None, keys=None):
|
||||||
"""Create a new vector store.
|
"""Create a new vector store.
|
||||||
|
|
||||||
shape (tuple): Size of the table, as (# entries, # columns)
|
shape (tuple): Size of the table, as (# entries, # columns)
|
||||||
data (numpy.ndarray): The vector data.
|
data (numpy.ndarray): The vector data.
|
||||||
|
keys (iterable): A sequence of keys, aligned with the data.
|
||||||
RETURNS (Vectors): The newly created object.
|
RETURNS (Vectors): The newly created object.
|
||||||
"""
|
"""
|
||||||
if data is None:
|
if data is None:
|
||||||
|
@ -57,7 +58,7 @@ cdef class Vectors:
|
||||||
if keys is not None:
|
if keys is not None:
|
||||||
for i, key in enumerate(keys):
|
for i, key in enumerate(keys):
|
||||||
self.add(key, row=i)
|
self.add(key, row=i)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def shape(self):
|
def shape(self):
|
||||||
"""Get `(rows, dims)` tuples of number of rows and number of dimensions
|
"""Get `(rows, dims)` tuples of number of rows and number of dimensions
|
||||||
|
@ -102,7 +103,7 @@ cdef class Vectors:
|
||||||
"""Set a vector for the given key.
|
"""Set a vector for the given key.
|
||||||
|
|
||||||
key (int): The key to set the vector for.
|
key (int): The key to set the vector for.
|
||||||
vector (numpy.ndarray): The vector to set.
|
vector (ndarray): The vector to set.
|
||||||
"""
|
"""
|
||||||
i = self.key2row[key]
|
i = self.key2row[key]
|
||||||
self.data[i] = vector
|
self.data[i] = vector
|
||||||
|
@ -110,9 +111,9 @@ cdef class Vectors:
|
||||||
self._unset.remove(i)
|
self._unset.remove(i)
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
"""Yield vectors from the table.
|
"""Iterate over the keys in the table.
|
||||||
|
|
||||||
YIELDS (ndarray): A vector.
|
YIELDS (int): A key in the table.
|
||||||
"""
|
"""
|
||||||
yield from self.key2row
|
yield from self.key2row
|
||||||
|
|
||||||
|
@ -132,14 +133,14 @@ cdef class Vectors:
|
||||||
return key in self.key2row
|
return key in self.key2row
|
||||||
|
|
||||||
def resize(self, shape, inplace=False):
|
def resize(self, shape, inplace=False):
|
||||||
'''Resize the underlying vectors array. If inplace=True, the memory
|
"""Resize the underlying vectors array. If inplace=True, the memory
|
||||||
is reallocated. This may cause other references to the data to become
|
is reallocated. This may cause other references to the data to become
|
||||||
invalid, so only use inplace=True if you're sure that's what you want.
|
invalid, so only use inplace=True if you're sure that's what you want.
|
||||||
|
|
||||||
If the number of vectors is reduced, keys mapped to rows that have been
|
If the number of vectors is reduced, keys mapped to rows that have been
|
||||||
deleted are removed. These removed items are returned as a list of
|
deleted are removed. These removed items are returned as a list of
|
||||||
(key, row) tuples.
|
`(key, row)` tuples.
|
||||||
'''
|
"""
|
||||||
if inplace:
|
if inplace:
|
||||||
self.data.resize(shape, refcheck=False)
|
self.data.resize(shape, refcheck=False)
|
||||||
else:
|
else:
|
||||||
|
@ -153,16 +154,22 @@ cdef class Vectors:
|
||||||
self.key2row.pop(key)
|
self.key2row.pop(key)
|
||||||
removed_items.append((key, row))
|
removed_items.append((key, row))
|
||||||
return removed_items
|
return removed_items
|
||||||
|
|
||||||
def keys(self):
|
def keys(self):
|
||||||
'''Iterate over the keys in the table.'''
|
"""A sequence of the keys in the table.
|
||||||
yield from self.key2row.keys()
|
|
||||||
|
RETURNS (iterable): The keys.
|
||||||
|
"""
|
||||||
|
return self.key2row.keys()
|
||||||
|
|
||||||
def values(self):
|
def values(self):
|
||||||
'''Iterate over vectors that have been assigned to at least one key.
|
"""Iterate over vectors that have been assigned to at least one key.
|
||||||
|
|
||||||
Note that some vectors may be unassigned, so the number of vectors
|
Note that some vectors may be unassigned, so the number of vectors
|
||||||
returned may be less than the length of the vectors table.'''
|
returned may be less than the length of the vectors table.
|
||||||
|
|
||||||
|
YIELDS (ndarray): A vector in the table.
|
||||||
|
"""
|
||||||
for row, vector in enumerate(range(self.data.shape[0])):
|
for row, vector in enumerate(range(self.data.shape[0])):
|
||||||
if row not in self._unset:
|
if row not in self._unset:
|
||||||
yield vector
|
yield vector
|
||||||
|
@ -208,12 +215,12 @@ cdef class Vectors:
|
||||||
if row in self._unset:
|
if row in self._unset:
|
||||||
self._unset.remove(row)
|
self._unset.remove(row)
|
||||||
return row
|
return row
|
||||||
|
|
||||||
def most_similar(self, queries, *, return_scores=False, return_rows=False,
|
def most_similar(self, queries, *, return_scores=False, return_rows=False,
|
||||||
batch_size=1024):
|
batch_size=1024):
|
||||||
'''For each of the given vectors, find the single entry most similar
|
'''For each of the given vectors, find the single entry most similar
|
||||||
to it, by cosine.
|
to it, by cosine.
|
||||||
|
|
||||||
Queries are by vector. Results are returned as an array of keys,
|
Queries are by vector. Results are returned as an array of keys,
|
||||||
or a tuple of (keys, scores) if return_scores=True. If `queries` is
|
or a tuple of (keys, scores) if return_scores=True. If `queries` is
|
||||||
large, the calculations are performed in chunks, to avoid consuming
|
large, the calculations are performed in chunks, to avoid consuming
|
||||||
|
@ -221,9 +228,9 @@ cdef class Vectors:
|
||||||
trade-off during the calculations.
|
trade-off during the calculations.
|
||||||
'''
|
'''
|
||||||
xp = get_array_module(self.data)
|
xp = get_array_module(self.data)
|
||||||
|
|
||||||
vectors = self.data / xp.linalg.norm(self.data, axis=1, keepdims=True)
|
vectors = self.data / xp.linalg.norm(self.data, axis=1, keepdims=True)
|
||||||
|
|
||||||
best_rows = xp.zeros((queries.shape[0],), dtype='i')
|
best_rows = xp.zeros((queries.shape[0],), dtype='i')
|
||||||
scores = xp.zeros((queries.shape[0],), dtype='f')
|
scores = xp.zeros((queries.shape[0],), dtype='f')
|
||||||
# Work in batches, to avoid memory problems.
|
# Work in batches, to avoid memory problems.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user