//- 💫 DOCS > API > VECTORS

include ../_includes/_mixins

p
    |  Vectors data is kept in the #[code Vectors.data] attribute, which should
    |  be an instance of #[code numpy.ndarray] (for CPU vectors) or
    |  #[code cupy.ndarray] (for GPU vectors). Multiple keys can be mapped to
    |  the same vector, and not all of the rows in the table need to be
    |  assigned – so #[code vectors.n_keys] may be greater or smaller than
    |  #[code vectors.shape[0]].

+h(2, "init") Vectors.__init__
    +tag method

p
    |  Create a new vector store. You can set the vector values and keys
    |  directly on initialisation, or supply a #[code shape] keyword argument
    |  to create an empty table you can add vectors to later.

+aside-code("Example").
    from spacy.vectors import Vectors

    empty_vectors = Vectors(shape=(10000, 300))

    data = numpy.zeros((3, 300), dtype='f')
    keys = [u'cat', u'dog', u'rat']
    vectors = Vectors(data=data, keys=keys)

+table(["Name", "Type", "Description"])
    +row
        +cell #[code data]
        +cell #[code.u-break ndarray[ndim=1, dtype='float32']]
        +cell The vector data.

    +row
        +cell #[code keys]
        +cell iterable
        +cell A sequence of keys aligned with the data.

    +row
        +cell #[code shape]
        +cell tuple
        +cell
            |  Size of the table as #[code (n_entries, n_columns)], the number
            |  of entries and number of columns. Not required if you're
            |  initialising the object with #[code data] and #[code keys].

    +row("foot")
        +cell returns
        +cell #[code Vectors]
        +cell The newly created object.

+h(2, "getitem") Vectors.__getitem__
    +tag method

p
    |  Get a vector by key. If the key is not found in the table, a
    |  #[code KeyError] is raised.

+aside-code("Example").
    cat_id = nlp.vocab.strings[u'cat']
    cat_vector = nlp.vocab.vectors[cat_id]
    assert cat_vector == nlp.vocab[u'cat'].vector

+table(["Name", "Type", "Description"])
    +row
        +cell #[code key]
        +cell int
        +cell The key to get the vector for.

    +row
        +cell returns
        +cell #[code.u-break ndarray[ndim=1, dtype='float32']]
        +cell The vector for the key.

+h(2, "setitem") Vectors.__setitem__
    +tag method

p
    |  Set a vector for the given key.

+aside-code("Example").
    cat_id = nlp.vocab.strings[u'cat']
    vector = numpy.random.uniform(-1, 1, (300,))
    nlp.vocab.vectors[cat_id] = vector

+table(["Name", "Type", "Description"])
    +row
        +cell #[code key]
        +cell int
        +cell The key to set the vector for.

    +row
        +cell #[code vector]
        +cell #[code.u-break ndarray[ndim=1, dtype='float32']]
        +cell The vector to set.

+h(2, "iter") Vectors.__iter__
    +tag method

p Iterate over the keys in the table.

+aside-code("Example").
    for key in nlp.vocab.vectors:
        print(key, nlp.vocab.strings[key])

+table(["Name", "Type", "Description"])
    +row("foot")
        +cell yields
        +cell int
        +cell A key in the table.

+h(2, "len") Vectors.__len__
    +tag method

p Return the number of vectors in the table.

+aside-code("Example").
    vectors = Vectors(shape=(3, 300))
    assert len(vectors) == 3

+table(["Name", "Type", "Description"])
    +row("foot")
        +cell returns
        +cell int
        +cell The number of vectors in the table.

+h(2, "contains") Vectors.__contains__
    +tag method

p
    |  Check whether a key has been mapped to a vector entry in the table.

+aside-code("Example").
    cat_id = nlp.vocab.strings[u'cat']
    nlp.vectors.add(cat_id, numpy.random.uniform(-1, 1, (300,)))
    assert cat_id in vectors

+table(["Name", "Type", "Description"])
    +row
        +cell #[code key]
        +cell int
        +cell The key to check.

    +row("foot")
        +cell returns
        +cell bool
        +cell Whether the key has a vector entry.

+h(2, "add") Vectors.add
    +tag method

p
    |  Add a key to the table, optionally setting a vector value as well. Keys
    |  can be mapped to an existing vector by setting #[code row], or a new
    |  vector can be added. When adding unicode keys, keep in mind that the
    |  #[code Vectors] class itself has no
    |  #[+api("stringstore") #[code StringStore]], so you have to store the
    |  hash-to-string mapping separately. If you need to manage the strings,
    |  you should use the #[code Vectors] via the
    |  #[+api("vocab") #[code Vocab]] class, e.g. #[code vocab.vectors].

+aside-code("Example").
    vector = numpy.random.uniform(-1, 1, (300,))
    cat_id = nlp.vocab.strings[u'cat']
    nlp.vocab.vectors.add(cat_id, vector=vector)
    nlp.vocab.vectors.add(u'dog', row=0)

+table(["Name", "Type", "Description"])
    +row
        +cell #[code key]
        +cell unicode / int
        +cell The key to add.

    +row
        +cell #[code vector]
        +cell #[code.u-break ndarray[ndim=1, dtype='float32']]
        +cell An optional vector to add for the key.

    +row
        +cell #[code row]
        +cell int
        +cell An optional row number of a vector to map the key to.

    +row("foot")
        +cell returns
        +cell int
        +cell The row the vector was added to.

+h(2, "resize") Vectors.resize
    +tag method

p
    |  Resize the underlying vectors array. If #[code inplace=True], the memory
    |  is reallocated. This may cause other references to the data to become
    |  invalid, so only use #[code inplace=True] if you're sure that's what you
    |  want. If the number of vectors is reduced, keys mapped to rows that have
    |  been deleted are removed. These removed items are returned as a list of
    |  #[code (key, row)] tuples.

+aside-code("Example").
    removed = nlp.vocab.vectors.resize((10000, 300))

+table(["Name", "Type", "Description"])
    +row
        +cell #[code shape]
        +cell tuple
        +cell
            |  A #[code (rows, dims)] tuple describing the number of rows and
            |  dimensions.

    +row
        +cell #[code inplace]
        +cell bool
        +cell Reallocate the memory.

    +row("foot")
        +cell returns
        +cell list
        +cell The removed items as a list of #[code (key, row)] tuples.

+h(2, "keys") Vectors.keys
    +tag method

p A sequence of the keys in the table.

+aside-code("Example").
    for key in nlp.vocab.vectors.keys():
        print(key, nlp.vocab.strings[key])

+table(["Name", "Type", "Description"])
    +row("foot")
        +cell returns
        +cell iterable
        +cell The keys.

+h(2, "values") Vectors.values
    +tag method

p
    |  Iterate over vectors that have been assigned to at least one key. Note
    |  that some vectors may be unassigned, so the number of vectors returned
    |  may be less than the length of the vectors table.

+aside-code("Example").
    for vector in nlp.vocab.vectors.values():
        print(vector)

+table(["Name", "Type", "Description"])
    +row("foot")
        +cell yields
        +cell #[code.u-break ndarray[ndim=1, dtype='float32']]
        +cell A vector in the table.

+h(2, "items") Vectors.items
    +tag method

p Iterate over #[code (key, vector)] pairs, in order.

+aside-code("Example").
    for key, vector in nlp.vocab.vectors.items():
        print(key, nlp.vocab.strings[key], vector)

+table(["Name", "Type", "Description"])
    +row("foot")
        +cell yields
        +cell tuple
        +cell #[code (key, vector)] pairs, in order.

+h(2, "shape") Vectors.shape
    +tag property

p
    |  Get #[code (rows, dims)] tuples of number of rows and number of
    |  dimensions in the vector table.

+aside-code("Example").
    vectors = Vectors(shape(1, 300))
    vectors.add(u'cat', numpy.random.uniform(-1, 1, (300,)))
    rows, dims = vectors.shape
    assert rows == 1
    assert dims == 300

+table(["Name", "Type", "Description"])
    +row("foot")
        +cell returns
        +cell tuple
        +cell A #[code (rows, dims)] pair.

+h(2, "size") Vectors.size
    +tag property

p The vector size, i.e. #[code rows * dims].

+aside-code("Example").
    vectors = Vectors(shape=(500, 300))
    assert vectors.size == 150000

+table(["Name", "Type", "Description"])
    +row("foot")
        +cell returns
        +cell int
        +cell The vector size.

+h(2, "is_full") Vectors.is_full
    +tag property

p
    |  Whether the vectors table is full and has no slots are available for new
    |  keys. If a table is full, it can be resized using
    |  #[+api("vectors#resize") #[code Vectors.resize]].

+aside-code("Example").
    vectors = Vectors(shape=(1, 300))
    vectors.add(u'cat', numpy.random.uniform(-1, 1, (300,)))
    assert vectors.is_full

+table(["Name", "Type", "Description"])
    +row("foot")
        +cell returns
        +cell bool
        +cell Whether the vectors table is full.

+h(2, "n_keys") Vectors.n_keys
    +tag property

p
    |  Get the number of keys in the table. Note that this is the number of
    |  #[em all] keys, not just unique vectors. If several keys are mapped
    |  are mapped to the same vectors, they will be counted individually.

+aside-code("Example").
    vectors = Vectors(shape=(10, 300))
    assert len(vectors) == 10
    assert vectors.n_keys == 0

+table(["Name", "Type", "Description"])
    +row("foot")
        +cell returns
        +cell int
        +cell The number of all keys in the table.

+h(2, "from_glove") Vectors.from_glove
    +tag method

p
    |  Load #[+a("https://nlp.stanford.edu/projects/glove/") GloVe] vectors from
    |  a directory. Assumes binary format, that the vocab is in a
    |  #[code vocab.txt], and that vectors are named
    |  #[code vectors.{size}.[fd].bin], e.g. #[code vectors.128.f.bin] for 128d
    |  float32 vectors, #[code vectors.300.d.bin] for 300d float64 (double)
    |  vectors, etc. By default GloVe outputs 64-bit vectors.

+aside-code("Example").
    vectors = Vectors()
    vectors.from_glove('/path/to/glove_vectors')

+table(["Name", "Type", "Description"])
    +row
        +cell #[code path]
        +cell unicode / #[code Path]
        +cell The path to load the GloVe vectors from.

+h(2, "to_disk") Vectors.to_disk
    +tag method

p Save the current state to a directory.

+aside-code("Example").
    vectors.to_disk('/path/to/vectors')

+table(["Name", "Type", "Description"])
    +row
        +cell #[code path]
        +cell unicode / #[code Path]
        +cell
            |  A path to a directory, which will be created if it doesn't exist.
            |  Paths may be either strings or #[code Path]-like objects.

    +row
        +cell #[code **exclude]
        +cell -
        +cell Named attributes to prevent from being saved.

+h(2, "from_disk") Vectors.from_disk
    +tag method

p Loads state from a directory. Modifies the object in place and returns it.

+aside-code("Example").
    vectors = Vectors(StringStore())
    vectors.from_disk('/path/to/vectors')

+table(["Name", "Type", "Description"])
    +row
        +cell #[code path]
        +cell unicode / #[code Path]
        +cell
            |  A path to a directory. Paths may be either strings or
            |  #[code Path]-like objects.

    +row("foot")
        +cell returns
        +cell #[code Vectors]
        +cell The modified #[code Vectors] object.

+h(2, "to_bytes") Vectors.to_bytes
    +tag method

p Serialize the current state to a binary string.

+aside-code("Example").
    vectors_bytes = vectors.to_bytes()

+table(["Name", "Type", "Description"])
    +row
        +cell #[code **exclude]
        +cell -
        +cell Named attributes to prevent from being serialized.

    +row("foot")
        +cell returns
        +cell bytes
        +cell The serialized form of the #[code Vectors] object.

+h(2, "from_bytes") Vectors.from_bytes
    +tag method

p Load state from a binary string.

+aside-code("Example").
    fron spacy.vectors import Vectors
    vectors_bytes = vectors.to_bytes()
    new_vectors = Vectors(StringStore())
    new_vectors.from_bytes(vectors_bytes)

+table(["Name", "Type", "Description"])
    +row
        +cell #[code data]
        +cell bytes
        +cell The data to load from.

    +row
        +cell #[code **exclude]
        +cell -
        +cell Named attributes to prevent from being loaded.

    +row("foot")
        +cell returns
        +cell #[code Vectors]
        +cell The #[code Vectors] object.

+h(2, "attributes") Attributes

+table(["Name", "Type", "Description"])
    +row
        +cell #[code data]
        +cell #[code.u-break ndarray[ndim=1, dtype='float32']]
        +cell
            |  Stored vectors data. #[code numpy] is used for CPU vectors,
            |  #[code cupy] for GPU vectors.

    +row
        +cell #[code key2row]
        +cell dict
        +cell
            |  Dictionary mapping word hashes to rows in the
            |  #[code Vectors.data] table.

    +row
        +cell #[code keys]
        +cell #[code.u-break ndarray[ndim=1, dtype='float32']]
        +cell
            |  Array keeping the keys in order, such that
            |  #[code keys[vectors.key2row[key]] == key]