//- 💫 DOCS > API > STRINGSTORE include ../../_includes/_mixins p | Look up strings by 64-bit hashes. As of v2.0, spaCy uses hash values | instead of integer IDs. This ensures that strings always map to the | same ID, even from different #[code StringStores]. +h(2, "init") StringStore.__init__ +tag method p | Create the #[code StringStore]. +aside-code("Example"). from spacy.strings import StringStore stringstore = StringStore([u'apple', u'orange']) +table(["Name", "Type", "Description"]) +row +cell #[code strings] +cell iterable +cell A sequence of unicode strings to add to the store. +footrow +cell returns +cell #[code StringStore] +cell The newly constructed object. +h(2, "len") StringStore.__len__ +tag method p Get the number of strings in the store. +aside-code("Example"). stringstore = StringStore([u'apple', u'orange']) assert len(stringstore) == 2 +table(["Name", "Type", "Description"]) +footrow +cell returns +cell int +cell The number of strings in the store. +h(2, "getitem") StringStore.__getitem__ +tag method p Retrieve a string from a given hash, or vice versa. +aside-code("Example"). stringstore = StringStore([u'apple', u'orange']) apple_hash = stringstore[u'apple'] assert apple_hash == 8566208034543834098 assert stringstore[apple_hash] == u'apple' +table(["Name", "Type", "Description"]) +row +cell #[code string_or_id] +cell bytes, unicode or uint64 +cell The value to encode. +footrow +cell returns +cell unicode or int +cell The value to be retrieved. +h(2, "contains") StringStore.__contains__ +tag method p Check whether a string is in the store. +aside-code("Example"). stringstore = StringStore([u'apple', u'orange']) assert u'apple' in stringstore assert not u'cherry' in stringstore +table(["Name", "Type", "Description"]) +row +cell #[code string] +cell unicode +cell The string to check. +footrow +cell returns +cell bool +cell Whether the store contains the string. +h(2, "iter") StringStore.__iter__ +tag method p | Iterate over the strings in the store, in order. Note that a newly | initialised store will always include an empty string #[code ''] at | position #[code 0]. +aside-code("Example"). stringstore = StringStore([u'apple', u'orange']) all_strings = [s for s in stringstore] assert all_strings == [u'apple', u'orange'] +table(["Name", "Type", "Description"]) +footrow +cell yields +cell unicode +cell A string in the store. +h(2, "add") StringStore.add +tag method +tag-new(2) p Add a string to the #[code StringStore]. +aside-code("Example"). stringstore = StringStore([u'apple', u'orange']) banana_hash = stringstore.add(u'banana') assert len(stringstore) == 3 assert banana_hash == 2525716904149915114 assert stringstore[banana_hash] == u'banana' assert stringstore[u'banana'] == banana_hash +table(["Name", "Type", "Description"]) +row +cell #[code string] +cell unicode +cell The string to add. +footrow +cell returns +cell uint64 +cell The string's hash value. +h(2, "to_disk") StringStore.to_disk +tag method +tag-new(2) p Save the current state to a directory. +aside-code("Example"). stringstore.to_disk('/path/to/strings') +table(["Name", "Type", "Description"]) +row +cell #[code path] +cell unicode or #[code Path] +cell | A path to a directory, which will be created if it doesn't exist. | Paths may be either strings or #[code Path]-like objects. +h(2, "from_disk") StringStore.from_disk +tag method +tag-new(2) p Loads state from a directory. Modifies the object in place and returns it. +aside-code("Example"). from spacy.strings import StringStore stringstore = StringStore().from_disk('/path/to/strings') +table(["Name", "Type", "Description"]) +row +cell #[code path] +cell unicode or #[code Path] +cell | A path to a directory. Paths may be either strings or | #[code Path]-like objects. +footrow +cell returns +cell #[code StringStore] +cell The modified #[code StringStore] object. +h(2, "to_bytes") StringStore.to_bytes +tag method p Serialize the current state to a binary string. +aside-code("Example"). store_bytes = stringstore.to_bytes() +table(["Name", "Type", "Description"]) +row +cell #[code **exclude] +cell - +cell Named attributes to prevent from being serialized. +footrow +cell returns +cell bytes +cell The serialized form of the #[code StringStore] object. +h(2, "from_bytes") StringStore.from_bytes +tag method p Load state from a binary string. +aside-code("Example"). fron spacy.strings import StringStore store_bytes = stringstore.to_bytes() new_store = StringStore().from_bytes(store_bytes) +table(["Name", "Type", "Description"]) +row +cell #[code bytes_data] +cell bytes +cell The data to load from. +row +cell #[code **exclude] +cell - +cell Named attributes to prevent from being loaded. +footrow +cell returns +cell #[code StringStore] +cell The #[code StringStore] object. +h(2, "util") Utilities +h(3, "hash_string") strings.hash_string +tag function p Get a 64-bit hash for a given string. +aside-code("Example"). from spacy.strings import hash_string assert hash_string(u'apple') == 8566208034543834098 +table(["Name", "Type", "Description"]) +row +cell #[code string] +cell unicode +cell The string to hash. +footrow +cell returns +cell uint64 +cell The hash.