//- 💫 DOCS > API > VECTORS include ../_includes/_mixins p | Vectors data is kept in the #[code Vectors.data] attribute, which should | be an instance of #[code numpy.ndarray] (for CPU vectors) or | #[code cupy.ndarray] (for GPU vectors). Multiple keys can be mapped to | the same vector, and not all of the rows in the table need to be | assigned – so #[code vectors.n_keys] may be greater or smaller than | #[code vectors.shape[0]]. +h(2, "init") Vectors.__init__ +tag method p | Create a new vector store. You can set the vector values and keys | directly on initialisation, or supply a #[code shape] keyword argument | to create an empty table you can add vectors to later. +aside-code("Example"). from spacy.vectors import Vectors empty_vectors = Vectors(shape=(10000, 300)) data = numpy.zeros((3, 300), dtype='f') keys = [u'cat', u'dog', u'rat'] vectors = Vectors(data=data, keys=keys) +table(["Name", "Type", "Description"]) +row +cell #[code data] +cell #[code.u-break ndarray[ndim=1, dtype='float32']] +cell The vector data. +row +cell #[code keys] +cell iterable +cell A sequence of keys aligned with the data. +row +cell #[code shape] +cell tuple +cell | Size of the table as #[code (n_entries, n_columns)], the number | of entries and number of columns. Not required if you're | initialising the object with #[code data] and #[code keys]. +row("foot") +cell returns +cell #[code Vectors] +cell The newly created object. +h(2, "getitem") Vectors.__getitem__ +tag method p | Get a vector by key. If the key is not found in the table, a | #[code KeyError] is raised. +aside-code("Example"). cat_id = nlp.vocab.strings[u'cat'] cat_vector = nlp.vocab.vectors[cat_id] assert cat_vector == nlp.vocab[u'cat'].vector +table(["Name", "Type", "Description"]) +row +cell #[code key] +cell int +cell The key to get the vector for. +row +cell returns +cell #[code.u-break ndarray[ndim=1, dtype='float32']] +cell The vector for the key. +h(2, "setitem") Vectors.__setitem__ +tag method p | Set a vector for the given key. +aside-code("Example"). cat_id = nlp.vocab.strings[u'cat'] vector = numpy.random.uniform(-1, 1, (300,)) nlp.vocab.vectors[cat_id] = vector +table(["Name", "Type", "Description"]) +row +cell #[code key] +cell int +cell The key to set the vector for. +row +cell #[code vector] +cell #[code.u-break ndarray[ndim=1, dtype='float32']] +cell The vector to set. +h(2, "iter") Vectors.__iter__ +tag method p Iterate over the keys in the table. +aside-code("Example"). for key in nlp.vocab.vectors: print(key, nlp.vocab.strings[key]) +table(["Name", "Type", "Description"]) +row("foot") +cell yields +cell int +cell A key in the table. +h(2, "len") Vectors.__len__ +tag method p Return the number of vectors in the table. +aside-code("Example"). vectors = Vectors(shape=(3, 300)) assert len(vectors) == 3 +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell int +cell The number of vectors in the table. +h(2, "contains") Vectors.__contains__ +tag method p | Check whether a key has been mapped to a vector entry in the table. +aside-code("Example"). cat_id = nlp.vocab.strings[u'cat'] nlp.vectors.add(cat_id, numpy.random.uniform(-1, 1, (300,))) assert cat_id in vectors +table(["Name", "Type", "Description"]) +row +cell #[code key] +cell int +cell The key to check. +row("foot") +cell returns +cell bool +cell Whether the key has a vector entry. +h(2, "add") Vectors.add +tag method p | Add a key to the table, optionally setting a vector value as well. Keys | can be mapped to an existing vector by setting #[code row], or a new | vector can be added. When adding unicode keys, keep in mind that the | #[code Vectors] class itself has no | #[+api("stringstore") #[code StringStore]], so you have to store the | hash-to-string mapping separately. If you need to manage the strings, | you should use the #[code Vectors] via the | #[+api("vocab") #[code Vocab]] class, e.g. #[code vocab.vectors]. +aside-code("Example"). vector = numpy.random.uniform(-1, 1, (300,)) cat_id = nlp.vocab.strings[u'cat'] nlp.vocab.vectors.add(cat_id, vector=vector) nlp.vocab.vectors.add(u'dog', row=0) +table(["Name", "Type", "Description"]) +row +cell #[code key] +cell unicode / int +cell The key to add. +row +cell #[code vector] +cell #[code.u-break ndarray[ndim=1, dtype='float32']] +cell An optional vector to add for the key. +row +cell #[code row] +cell int +cell An optional row number of a vector to map the key to. +row("foot") +cell returns +cell int +cell The row the vector was added to. +h(2, "resize") Vectors.resize +tag method p | Resize the underlying vectors array. If #[code inplace=True], the memory | is reallocated. This may cause other references to the data to become | invalid, so only use #[code inplace=True] if you're sure that's what you | want. If the number of vectors is reduced, keys mapped to rows that have | been deleted are removed. These removed items are returned as a list of | #[code (key, row)] tuples. +aside-code("Example"). removed = nlp.vocab.vectors.resize((10000, 300)) +table(["Name", "Type", "Description"]) +row +cell #[code shape] +cell tuple +cell | A #[code (rows, dims)] tuple describing the number of rows and | dimensions. +row +cell #[code inplace] +cell bool +cell Reallocate the memory. +row("foot") +cell returns +cell list +cell The removed items as a list of #[code (key, row)] tuples. +h(2, "keys") Vectors.keys +tag method p A sequence of the keys in the table. +aside-code("Example"). for key in nlp.vocab.vectors.keys(): print(key, nlp.vocab.strings[key]) +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell iterable +cell The keys. +h(2, "values") Vectors.values +tag method p | Iterate over vectors that have been assigned to at least one key. Note | that some vectors may be unassigned, so the number of vectors returned | may be less than the length of the vectors table. +aside-code("Example"). for vector in nlp.vocab.vectors.values(): print(vector) +table(["Name", "Type", "Description"]) +row("foot") +cell yields +cell #[code.u-break ndarray[ndim=1, dtype='float32']] +cell A vector in the table. +h(2, "items") Vectors.items +tag method p Iterate over #[code (key, vector)] pairs, in order. +aside-code("Example"). for key, vector in nlp.vocab.vectors.items(): print(key, nlp.vocab.strings[key], vector) +table(["Name", "Type", "Description"]) +row("foot") +cell yields +cell tuple +cell #[code (key, vector)] pairs, in order. +h(2, "shape") Vectors.shape +tag property p | Get #[code (rows, dims)] tuples of number of rows and number of | dimensions in the vector table. +aside-code("Example"). vectors = Vectors(shape(1, 300)) vectors.add(u'cat', numpy.random.uniform(-1, 1, (300,))) rows, dims = vectors.shape assert rows == 1 assert dims == 300 +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell tuple +cell A #[code (rows, dims)] pair. +h(2, "size") Vectors.size +tag property p The vector size, i.e. #[code rows * dims]. +aside-code("Example"). vectors = Vectors(shape=(500, 300)) assert vectors.size == 150000 +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell int +cell The vector size. +h(2, "is_full") Vectors.is_full +tag property p | Whether the vectors table is full and has no slots are available for new | keys. If a table is full, it can be resized using | #[+api("vectors#resize") #[code Vectors.resize]]. +aside-code("Example"). vectors = Vectors(shape=(1, 300)) vectors.add(u'cat', numpy.random.uniform(-1, 1, (300,))) assert vectors.is_full +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell bool +cell Whether the vectors table is full. +h(2, "n_keys") Vectors.n_keys +tag property p | Get the number of keys in the table. Note that this is the number of | #[em all] keys, not just unique vectors. If several keys are mapped | are mapped to the same vectors, they will be counted individually. +aside-code("Example"). vectors = Vectors(shape=(10, 300)) assert len(vectors) == 10 assert vectors.n_keys == 0 +table(["Name", "Type", "Description"]) +row("foot") +cell returns +cell int +cell The number of all keys in the table. +h(2, "from_glove") Vectors.from_glove +tag method p | Load #[+a("https://nlp.stanford.edu/projects/glove/") GloVe] vectors from | a directory. Assumes binary format, that the vocab is in a | #[code vocab.txt], and that vectors are named | #[code vectors.{size}.[fd].bin], e.g. #[code vectors.128.f.bin] for 128d | float32 vectors, #[code vectors.300.d.bin] for 300d float64 (double) | vectors, etc. By default GloVe outputs 64-bit vectors. +aside-code("Example"). vectors = Vectors() vectors.from_glove('/path/to/glove_vectors') +table(["Name", "Type", "Description"]) +row +cell #[code path] +cell unicode / #[code Path] +cell The path to load the GloVe vectors from. +h(2, "to_disk") Vectors.to_disk +tag method p Save the current state to a directory. +aside-code("Example"). vectors.to_disk('/path/to/vectors') +table(["Name", "Type", "Description"]) +row +cell #[code path] +cell unicode / #[code Path] +cell | A path to a directory, which will be created if it doesn't exist. | Paths may be either strings or #[code Path]-like objects. +row +cell #[code **exclude] +cell - +cell Named attributes to prevent from being saved. +h(2, "from_disk") Vectors.from_disk +tag method p Loads state from a directory. Modifies the object in place and returns it. +aside-code("Example"). vectors = Vectors(StringStore()) vectors.from_disk('/path/to/vectors') +table(["Name", "Type", "Description"]) +row +cell #[code path] +cell unicode / #[code Path] +cell | A path to a directory. Paths may be either strings or | #[code Path]-like objects. +row("foot") +cell returns +cell #[code Vectors] +cell The modified #[code Vectors] object. +h(2, "to_bytes") Vectors.to_bytes +tag method p Serialize the current state to a binary string. +aside-code("Example"). vectors_bytes = vectors.to_bytes() +table(["Name", "Type", "Description"]) +row +cell #[code **exclude] +cell - +cell Named attributes to prevent from being serialized. +row("foot") +cell returns +cell bytes +cell The serialized form of the #[code Vectors] object. +h(2, "from_bytes") Vectors.from_bytes +tag method p Load state from a binary string. +aside-code("Example"). fron spacy.vectors import Vectors vectors_bytes = vectors.to_bytes() new_vectors = Vectors(StringStore()) new_vectors.from_bytes(vectors_bytes) +table(["Name", "Type", "Description"]) +row +cell #[code data] +cell bytes +cell The data to load from. +row +cell #[code **exclude] +cell - +cell Named attributes to prevent from being loaded. +row("foot") +cell returns +cell #[code Vectors] +cell The #[code Vectors] object. +h(2, "attributes") Attributes +table(["Name", "Type", "Description"]) +row +cell #[code data] +cell #[code.u-break ndarray[ndim=1, dtype='float32']] +cell | Stored vectors data. #[code numpy] is used for CPU vectors, | #[code cupy] for GPU vectors. +row +cell #[code key2row] +cell dict +cell | Dictionary mapping word hashes to rows in the | #[code Vectors.data] table. +row +cell #[code keys] +cell #[code.u-break ndarray[ndim=1, dtype='float32']] +cell | Array keeping the keys in order, such that | #[code keys[vectors.key2row[key]] == key]