mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
Update Vectors API docs
This commit is contained in:
parent
9eb998443f
commit
0d8f4a534b
|
@ -5,46 +5,47 @@ include ../_includes/_mixins
|
||||||
p
|
p
|
||||||
| Vectors data is kept in the #[code Vectors.data] attribute, which should
|
| Vectors data is kept in the #[code Vectors.data] attribute, which should
|
||||||
| be an instance of #[code numpy.ndarray] (for CPU vectors) or
|
| be an instance of #[code numpy.ndarray] (for CPU vectors) or
|
||||||
| #[code cupy.ndarray] (for GPU vectors).
|
| #[code cupy.ndarray] (for GPU vectors). Multiple keys can be mapped to
|
||||||
|
| the same vector, and not all of the rows in the table need to be
|
||||||
|
| assigned – so #[code vectors.n_keys] may be greater or smaller than
|
||||||
|
| #[code vectors.shape[0]].
|
||||||
|
|
||||||
+h(2, "init") Vectors.__init__
|
+h(2, "init") Vectors.__init__
|
||||||
+tag method
|
+tag method
|
||||||
|
|
||||||
p
|
p
|
||||||
| Create a new vector store. To keep the vector table empty, pass
|
| Create a new vector store. You can set the vector values and keys
|
||||||
| #[code width=0]. You can also create the vector table and add
|
| directly on initialisation, or supply a #[code shape] keyword argument
|
||||||
| vectors one by one, or set the vector values directly on initialisation.
|
| to create an empty table you can add vectors to later.
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
from spacy.vectors import Vectors
|
from spacy.vectors import Vectors
|
||||||
from spacy.strings import StringStore
|
|
||||||
|
|
||||||
empty_vectors = Vectors(StringStore())
|
empty_vectors = Vectors(shape=(10000, 300))
|
||||||
|
|
||||||
vectors = Vectors([u'cat'], width=300)
|
data = numpy.zeros((3, 300), dtype='f')
|
||||||
vectors[u'cat'] = numpy.random.uniform(-1, 1, (300,))
|
keys = [u'cat', u'dog', u'rat']
|
||||||
|
vectors = Vectors(data=data, keys=keys)
|
||||||
vector_table = numpy.zeros((3, 300), dtype='f')
|
|
||||||
vectors = Vectors(StringStore(), data=vector_table)
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
+row
|
|
||||||
+cell #[code strings]
|
|
||||||
+cell #[code StringStore] or list
|
|
||||||
+cell
|
|
||||||
| List of strings, or a #[+api("stringstore") #[code StringStore]]
|
|
||||||
| that maps strings to hash values, and vice versa.
|
|
||||||
|
|
||||||
+row
|
|
||||||
+cell #[code width]
|
|
||||||
+cell int
|
|
||||||
+cell Number of dimensions.
|
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code data]
|
+cell #[code data]
|
||||||
+cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
|
+cell #[code.u-break ndarray[ndim=1, dtype='float32']]
|
||||||
+cell The vector data.
|
+cell The vector data.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code keys]
|
||||||
|
+cell iterable
|
||||||
|
+cell A sequence of keys aligned with the data.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code shape]
|
||||||
|
+cell tuple
|
||||||
|
+cell
|
||||||
|
| Size of the table as #[code (n_entries, n_columns)], the number
|
||||||
|
| of entries and number of columns. Not required if you're
|
||||||
|
| initialising the object with #[code data] and #[code keys].
|
||||||
|
|
||||||
+row("foot")
|
+row("foot")
|
||||||
+cell returns
|
+cell returns
|
||||||
+cell #[code Vectors]
|
+cell #[code Vectors]
|
||||||
|
@ -54,97 +55,92 @@ p
|
||||||
+tag method
|
+tag method
|
||||||
|
|
||||||
p
|
p
|
||||||
| Get a vector by key. If key is a string, it is hashed to an integer ID
|
| Get a vector by key. If the key is not found in the table, a
|
||||||
| using the #[code Vectors.strings] table. If the integer key is not found
|
| #[code KeyError] is raised.
|
||||||
| in the table, a #[code KeyError] is raised.
|
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
vectors = Vectors(StringStore(), 300)
|
cat_id = nlp.vocab.strings[u'cat']
|
||||||
vectors.add(u'cat', numpy.random.uniform(-1, 1, (300,)))
|
cat_vector = nlp.vocab.vectors[cat_id]
|
||||||
cat_vector = vectors[u'cat']
|
assert cat_vector == nlp.vocab[u'cat'].vector
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
+row
|
+row
|
||||||
+cell #[code key]
|
+cell #[code key]
|
||||||
+cell unicode / int
|
+cell int
|
||||||
+cell The key to get the vector for.
|
+cell The key to get the vector for.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell returns
|
+cell returns
|
||||||
+cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
|
+cell #[code.u-break ndarray[ndim=1, dtype='float32']]
|
||||||
+cell The vector for the key.
|
+cell The vector for the key.
|
||||||
|
|
||||||
+h(2, "setitem") Vectors.__setitem__
|
+h(2, "setitem") Vectors.__setitem__
|
||||||
+tag method
|
+tag method
|
||||||
|
|
||||||
p
|
p
|
||||||
| Set a vector for the given key. If key is a string, it is hashed to an
|
| Set a vector for the given key.
|
||||||
| integer ID using the #[code Vectors.strings] table.
|
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
vectors = Vectors(StringStore(), 300)
|
cat_id = nlp.vocab.strings[u'cat']
|
||||||
vectors[u'cat'] = numpy.random.uniform(-1, 1, (300,))
|
vector = numpy.random.uniform(-1, 1, (300,))
|
||||||
|
nlp.vocab.vectors[cat_id] = vector
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
+row
|
+row
|
||||||
+cell #[code key]
|
+cell #[code key]
|
||||||
+cell unicode / int
|
+cell int
|
||||||
+cell The key to set the vector for.
|
+cell The key to set the vector for.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code vector]
|
+cell #[code vector]
|
||||||
+cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
|
+cell #[code.u-break ndarray[ndim=1, dtype='float32']]
|
||||||
+cell The vector to set.
|
+cell The vector to set.
|
||||||
|
|
||||||
+h(2, "iter") Vectors.__iter__
|
+h(2, "iter") Vectors.__iter__
|
||||||
+tag method
|
+tag method
|
||||||
|
|
||||||
p Yield vectors from the table.
|
p Iterate over the keys in the table.
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
vector_table = numpy.zeros((3, 300), dtype='f')
|
for key in nlp.vocab.vectors:
|
||||||
vectors = Vectors(StringStore(), vector_table)
|
print(key, nlp.vocab.strings[key])
|
||||||
for vector in vectors:
|
|
||||||
print(vector)
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
+row("foot")
|
+row("foot")
|
||||||
+cell yields
|
+cell yields
|
||||||
+cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
|
+cell int
|
||||||
+cell A vector from the table.
|
+cell A key in the table.
|
||||||
|
|
||||||
+h(2, "len") Vectors.__len__
|
+h(2, "len") Vectors.__len__
|
||||||
+tag method
|
+tag method
|
||||||
|
|
||||||
p Return the number of vectors that have been assigned.
|
p Return the number of vectors in the table.
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
vector_table = numpy.zeros((3, 300), dtype='f')
|
vectors = Vectors(shape=(3, 300))
|
||||||
vectors = Vectors(StringStore(), vector_table)
|
|
||||||
assert len(vectors) == 3
|
assert len(vectors) == 3
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
+row("foot")
|
+row("foot")
|
||||||
+cell returns
|
+cell returns
|
||||||
+cell int
|
+cell int
|
||||||
+cell The number of vectors in the data.
|
+cell The number of vectors in the table.
|
||||||
|
|
||||||
+h(2, "contains") Vectors.__contains__
|
+h(2, "contains") Vectors.__contains__
|
||||||
+tag method
|
+tag method
|
||||||
|
|
||||||
p
|
p
|
||||||
| Check whether a key has a vector entry in the table. If key is a string,
|
| Check whether a key has been mapped to a vector entry in the table.
|
||||||
| it is hashed to an integer ID using the #[code Vectors.strings] table.
|
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
vectors = Vectors(StringStore(), 300)
|
cat_id = nlp.vocab.strings[u'cat']
|
||||||
vectors.add(u'cat', numpy.random.uniform(-1, 1, (300,)))
|
nlp.vectors.add(cat_id, numpy.random.uniform(-1, 1, (300,)))
|
||||||
assert u'cat' in vectors
|
assert cat_id in vectors
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
+row
|
+row
|
||||||
+cell #[code key]
|
+cell #[code key]
|
||||||
+cell unicode / int
|
+cell int
|
||||||
+cell The key to check.
|
+cell The key to check.
|
||||||
|
|
||||||
+row("foot")
|
+row("foot")
|
||||||
|
@ -156,13 +152,20 @@ p
|
||||||
+tag method
|
+tag method
|
||||||
|
|
||||||
p
|
p
|
||||||
| Add a key to the table, optionally setting a vector value as well. If
|
| Add a key to the table, optionally setting a vector value as well. Keys
|
||||||
| key is a string, it is hashed to an integer ID using the
|
| can be mapped to an existing vector by setting #[code row], or a new
|
||||||
| #[code Vectors.strings] table.
|
| vector can be added. When adding unicode keys, keep in mind that the
|
||||||
|
| #[code Vectors] class itself has no
|
||||||
|
| #[+api("stringstore") #[code StringStore]], so you have to store the
|
||||||
|
| hash-to-string mapping separately. If you need to manage the strings,
|
||||||
|
| you should use the #[code Vectors] via the
|
||||||
|
| #[+api("vocab") #[code Vocab]] class, e.g. #[code vocab.vectors].
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
vectors = Vectors(StringStore(), 300)
|
vector = numpy.random.uniform(-1, 1, (300,))
|
||||||
vectors.add(u'cat', numpy.random.uniform(-1, 1, (300,)))
|
cat_id = nlp.vocab.strings[u'cat']
|
||||||
|
nlp.vocab.vectors.add(cat_id, vector=vector)
|
||||||
|
nlp.vocab.vectors.add(u'dog', row=0)
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
+row
|
+row
|
||||||
|
@ -172,25 +175,66 @@ p
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code vector]
|
+cell #[code vector]
|
||||||
+cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
|
+cell #[code.u-break ndarray[ndim=1, dtype='float32']]
|
||||||
+cell An optional vector to add.
|
+cell An optional vector to add for the key.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code row]
|
||||||
|
+cell int
|
||||||
|
+cell An optional row number of a vector to map the key to.
|
||||||
|
|
||||||
|
+row("foot")
|
||||||
|
+cell returns
|
||||||
|
+cell int
|
||||||
|
+cell The row the vector was added to.
|
||||||
|
|
||||||
|
+h(2, "keys") Vectors.keys
|
||||||
|
+tag method
|
||||||
|
|
||||||
|
p A sequence of the keys in the table.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
for key in nlp.vocab.vectors.keys():
|
||||||
|
print(key, nlp.vocab.strings[key])
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row("foot")
|
||||||
|
+cell returns
|
||||||
|
+cell iterable
|
||||||
|
+cell The keys.
|
||||||
|
|
||||||
|
+h(2, "values") Vectors.values
|
||||||
|
+tag method
|
||||||
|
|
||||||
|
p
|
||||||
|
| Iterate over vectors that have been assigned to at least one key. Note
|
||||||
|
| that some vectors may be unassigned, so the number of vectors returned
|
||||||
|
| may be less than the length of the vectors table.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
for vector in nlp.vocab.vectors.values():
|
||||||
|
print(vector)
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row("foot")
|
||||||
|
+cell yields
|
||||||
|
+cell #[code.u-break ndarray[ndim=1, dtype='float32']]
|
||||||
|
+cell A vector in the table.
|
||||||
|
|
||||||
+h(2, "items") Vectors.items
|
+h(2, "items") Vectors.items
|
||||||
+tag method
|
+tag method
|
||||||
|
|
||||||
p Iterate over #[code (string key, vector)] pairs, in order.
|
p Iterate over #[code (key, vector)] pairs, in order.
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
vectors = Vectors(StringStore(), 300)
|
for key, vector in nlp.vocab.vectors.items():
|
||||||
vectors.add(u'cat', numpy.random.uniform(-1, 1, (300,)))
|
print(key, nlp.vocab.strings[key], vector)
|
||||||
for key, vector in vectors.items():
|
|
||||||
print(key, vector)
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
+row("foot")
|
+row("foot")
|
||||||
+cell yields
|
+cell yields
|
||||||
+cell tuple
|
+cell tuple
|
||||||
+cell #[code (string key, vector)] pairs, in order.
|
+cell #[code (key, vector)] pairs, in order.
|
||||||
|
|
||||||
+h(2, "shape") Vectors.shape
|
+h(2, "shape") Vectors.shape
|
||||||
+tag property
|
+tag property
|
||||||
|
@ -200,7 +244,7 @@ p
|
||||||
| dimensions in the vector table.
|
| dimensions in the vector table.
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
vectors = Vectors(StringStore(), 300)
|
vectors = Vectors(shape(1, 300))
|
||||||
vectors.add(u'cat', numpy.random.uniform(-1, 1, (300,)))
|
vectors.add(u'cat', numpy.random.uniform(-1, 1, (300,)))
|
||||||
rows, dims = vectors.shape
|
rows, dims = vectors.shape
|
||||||
assert rows == 1
|
assert rows == 1
|
||||||
|
@ -212,6 +256,59 @@ p
|
||||||
+cell tuple
|
+cell tuple
|
||||||
+cell A #[code (rows, dims)] pair.
|
+cell A #[code (rows, dims)] pair.
|
||||||
|
|
||||||
|
+h(2, "size") Vectors.size
|
||||||
|
+tag property
|
||||||
|
|
||||||
|
p The vector size, i.e. #[code rows * dims].
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
vectors = Vectors(shape=(500, 300))
|
||||||
|
assert vectors.size == 150000
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row("foot")
|
||||||
|
+cell returns
|
||||||
|
+cell int
|
||||||
|
+cell The vector size.
|
||||||
|
|
||||||
|
+h(2, "is_full") Vectors.is_full
|
||||||
|
+tag property
|
||||||
|
|
||||||
|
p
|
||||||
|
| Whether the vectors table is full and has no slots are available for new
|
||||||
|
| keys. If a table is full, it can be resized using
|
||||||
|
| #[+api("vectors#resize") #[code Vectors.resize]].
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
vectors = Vectors(shape=(1, 300))
|
||||||
|
vectors.add(u'cat', numpy.random.uniform(-1, 1, (300,)))
|
||||||
|
assert vectors.is_full
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row("foot")
|
||||||
|
+cell returns
|
||||||
|
+cell bool
|
||||||
|
+cell Whether the vectors table is full.
|
||||||
|
|
||||||
|
+h(2, "n_keys") Vectors.n_keys
|
||||||
|
+tag property
|
||||||
|
|
||||||
|
p
|
||||||
|
| Get the number of keys in the table. Note that this is the number of
|
||||||
|
| #[em all] keys, not just unique vectors. If several keys are mapped
|
||||||
|
| are mapped to the same vectors, they will be counted individually.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
vectors = Vectors(shape=(10, 300))
|
||||||
|
assert len(vectors) == 10
|
||||||
|
assert vectors.n_keys == 0
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row("foot")
|
||||||
|
+cell returns
|
||||||
|
+cell int
|
||||||
|
+cell The number of all keys in the table.
|
||||||
|
|
||||||
+h(2, "from_glove") Vectors.from_glove
|
+h(2, "from_glove") Vectors.from_glove
|
||||||
+tag method
|
+tag method
|
||||||
|
|
||||||
|
@ -223,6 +320,10 @@ p
|
||||||
| float32 vectors, #[code vectors.300.d.bin] for 300d float64 (double)
|
| float32 vectors, #[code vectors.300.d.bin] for 300d float64 (double)
|
||||||
| vectors, etc. By default GloVe outputs 64-bit vectors.
|
| vectors, etc. By default GloVe outputs 64-bit vectors.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
vectors = Vectors()
|
||||||
|
vectors.from_glove('/path/to/glove_vectors')
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
+row
|
+row
|
||||||
+cell #[code path]
|
+cell #[code path]
|
||||||
|
@ -323,7 +424,7 @@ p Load state from a binary string.
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
+row
|
+row
|
||||||
+cell #[code data]
|
+cell #[code data]
|
||||||
+cell #[code numpy.ndarray] / #[code cupy.ndarray]
|
+cell #[code.u-break ndarray[ndim=1, dtype='float32']]
|
||||||
+cell
|
+cell
|
||||||
| Stored vectors data. #[code numpy] is used for CPU vectors,
|
| Stored vectors data. #[code numpy] is used for CPU vectors,
|
||||||
| #[code cupy] for GPU vectors.
|
| #[code cupy] for GPU vectors.
|
||||||
|
@ -337,7 +438,7 @@ p Load state from a binary string.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code keys]
|
+cell #[code keys]
|
||||||
+cell #[code numpy.ndarray]
|
+cell #[code.u-break ndarray[ndim=1, dtype='float32']]
|
||||||
+cell
|
+cell
|
||||||
| Array keeping the keys in order, such that
|
| Array keeping the keys in order, such that
|
||||||
| #[code keys[vectors.key2row[key]] == key]
|
| #[code keys[vectors.key2row[key]] == key]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user