spaCy/website/docs/api/stringstore.jade
2017-05-29 01:09:52 +02:00

240 lines
5.9 KiB
Plaintext

//- 💫 DOCS > API > STRINGSTORE
include ../../_includes/_mixins
p
| Look up strings by 64-bit hashes. As of v2.0, spaCy uses hash values
| instead of integer IDs. This ensures that strings always map to the
| same ID, even from different #[code StringStores].
+h(2, "init") StringStore.__init__
+tag method
p
| Create the #[code StringStore].
+aside-code("Example").
from spacy.strings import StringStore
stringstore = StringStore([u'apple', u'orange'])
+table(["Name", "Type", "Description"])
+row
+cell #[code strings]
+cell iterable
+cell A sequence of unicode strings to add to the store.
+footrow
+cell returns
+cell #[code StringStore]
+cell The newly constructed object.
+h(2, "len") StringStore.__len__
+tag method
p Get the number of strings in the store.
+aside-code("Example").
stringstore = StringStore([u'apple', u'orange'])
assert len(stringstore) == 2
+table(["Name", "Type", "Description"])
+footrow
+cell returns
+cell int
+cell The number of strings in the store.
+h(2, "getitem") StringStore.__getitem__
+tag method
p Retrieve a string from a given hash, or vice versa.
+aside-code("Example").
stringstore = StringStore([u'apple', u'orange'])
apple_hash = stringstore[u'apple']
assert apple_hash == 8566208034543834098
assert stringstore[apple_hash] == u'apple'
+table(["Name", "Type", "Description"])
+row
+cell #[code string_or_id]
+cell bytes, unicode or uint64
+cell The value to encode.
+footrow
+cell returns
+cell unicode or int
+cell The value to be retrieved.
+h(2, "contains") StringStore.__contains__
+tag method
p Check whether a string is in the store.
+aside-code("Example").
stringstore = StringStore([u'apple', u'orange'])
assert u'apple' in stringstore
assert not u'cherry' in stringstore
+table(["Name", "Type", "Description"])
+row
+cell #[code string]
+cell unicode
+cell The string to check.
+footrow
+cell returns
+cell bool
+cell Whether the store contains the string.
+h(2, "iter") StringStore.__iter__
+tag method
p
| Iterate over the strings in the store, in order. Note that a newly
| initialised store will always include an empty string #[code ''] at
| position #[code 0].
+aside-code("Example").
stringstore = StringStore([u'apple', u'orange'])
all_strings = [s for s in stringstore]
assert all_strings == [u'apple', u'orange']
+table(["Name", "Type", "Description"])
+footrow
+cell yields
+cell unicode
+cell A string in the store.
+h(2, "add") StringStore.add
+tag method
+tag-new(2)
p Add a string to the #[code StringStore].
+aside-code("Example").
stringstore = StringStore([u'apple', u'orange'])
banana_hash = stringstore.add(u'banana')
assert len(stringstore) == 3
assert banana_hash == 2525716904149915114
assert stringstore[banana_hash] == u'banana'
assert stringstore[u'banana'] == banana_hash
+table(["Name", "Type", "Description"])
+row
+cell #[code string]
+cell unicode
+cell The string to add.
+footrow
+cell returns
+cell uint64
+cell The string's hash value.
+h(2, "to_disk") StringStore.to_disk
+tag method
+tag-new(2)
p Save the current state to a directory.
+aside-code("Example").
stringstore.to_disk('/path/to/strings')
+table(["Name", "Type", "Description"])
+row
+cell #[code path]
+cell unicode or #[code Path]
+cell
| A path to a directory, which will be created if it doesn't exist.
| Paths may be either strings or #[code Path]-like objects.
+h(2, "from_disk") StringStore.from_disk
+tag method
+tag-new(2)
p Loads state from a directory. Modifies the object in place and returns it.
+aside-code("Example").
from spacy.strings import StringStore
stringstore = StringStore().from_disk('/path/to/strings')
+table(["Name", "Type", "Description"])
+row
+cell #[code path]
+cell unicode or #[code Path]
+cell
| A path to a directory. Paths may be either strings or
| #[code Path]-like objects.
+footrow
+cell returns
+cell #[code StringStore]
+cell The modified #[code StringStore] object.
+h(2, "to_bytes") StringStore.to_bytes
+tag method
p Serialize the current state to a binary string.
+aside-code("Example").
store_bytes = stringstore.to_bytes()
+table(["Name", "Type", "Description"])
+row
+cell #[code **exclude]
+cell -
+cell Named attributes to prevent from being serialized.
+footrow
+cell returns
+cell bytes
+cell The serialized form of the #[code StringStore] object.
+h(2, "from_bytes") StringStore.from_bytes
+tag method
p Load state from a binary string.
+aside-code("Example").
fron spacy.strings import StringStore
store_bytes = stringstore.to_bytes()
new_store = StringStore().from_bytes(store_bytes)
+table(["Name", "Type", "Description"])
+row
+cell #[code bytes_data]
+cell bytes
+cell The data to load from.
+row
+cell #[code **exclude]
+cell -
+cell Named attributes to prevent from being loaded.
+footrow
+cell returns
+cell #[code StringStore]
+cell The #[code StringStore] object.
+h(2, "util") Utilities
+h(3, "hash_string") strings.hash_string
+tag function
p Get a 64-bit hash for a given string.
+aside-code("Example").
from spacy.strings import hash_string
assert hash_string(u'apple') == 8566208034543834098
+table(["Name", "Type", "Description"])
+row
+cell #[code string]
+cell unicode
+cell The string to hash.
+footrow
+cell returns
+cell uint64
+cell The hash.