mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-03 22:06:37 +03:00
240 lines
5.9 KiB
Plaintext
240 lines
5.9 KiB
Plaintext
//- 💫 DOCS > API > STRINGSTORE
|
|
|
|
include ../_includes/_mixins
|
|
|
|
p
|
|
| Look up strings by 64-bit hashes. As of v2.0, spaCy uses hash values
|
|
| instead of integer IDs. This ensures that strings always map to the
|
|
| same ID, even from different #[code StringStores].
|
|
|
|
+h(2, "init") StringStore.__init__
|
|
+tag method
|
|
|
|
p
|
|
| Create the #[code StringStore].
|
|
|
|
+aside-code("Example").
|
|
from spacy.strings import StringStore
|
|
stringstore = StringStore([u'apple', u'orange'])
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code strings]
|
|
+cell iterable
|
|
+cell A sequence of unicode strings to add to the store.
|
|
|
|
+row("foot")
|
|
+cell returns
|
|
+cell #[code StringStore]
|
|
+cell The newly constructed object.
|
|
|
|
+h(2, "len") StringStore.__len__
|
|
+tag method
|
|
|
|
p Get the number of strings in the store.
|
|
|
|
+aside-code("Example").
|
|
stringstore = StringStore([u'apple', u'orange'])
|
|
assert len(stringstore) == 2
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row("foot")
|
|
+cell returns
|
|
+cell int
|
|
+cell The number of strings in the store.
|
|
|
|
+h(2, "getitem") StringStore.__getitem__
|
|
+tag method
|
|
|
|
p Retrieve a string from a given hash, or vice versa.
|
|
|
|
+aside-code("Example").
|
|
stringstore = StringStore([u'apple', u'orange'])
|
|
apple_hash = stringstore[u'apple']
|
|
assert apple_hash == 8566208034543834098
|
|
assert stringstore[apple_hash] == u'apple'
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code string_or_id]
|
|
+cell bytes, unicode or uint64
|
|
+cell The value to encode.
|
|
|
|
+row("foot")
|
|
+cell returns
|
|
+cell unicode or int
|
|
+cell The value to be retrieved.
|
|
|
|
+h(2, "contains") StringStore.__contains__
|
|
+tag method
|
|
|
|
p Check whether a string is in the store.
|
|
|
|
+aside-code("Example").
|
|
stringstore = StringStore([u'apple', u'orange'])
|
|
assert u'apple' in stringstore
|
|
assert not u'cherry' in stringstore
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code string]
|
|
+cell unicode
|
|
+cell The string to check.
|
|
|
|
+row("foot")
|
|
+cell returns
|
|
+cell bool
|
|
+cell Whether the store contains the string.
|
|
|
|
+h(2, "iter") StringStore.__iter__
|
|
+tag method
|
|
|
|
p
|
|
| Iterate over the strings in the store, in order. Note that a newly
|
|
| initialised store will always include an empty string #[code ''] at
|
|
| position #[code 0].
|
|
|
|
+aside-code("Example").
|
|
stringstore = StringStore([u'apple', u'orange'])
|
|
all_strings = [s for s in stringstore]
|
|
assert all_strings == [u'apple', u'orange']
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row("foot")
|
|
+cell yields
|
|
+cell unicode
|
|
+cell A string in the store.
|
|
|
|
+h(2, "add") StringStore.add
|
|
+tag method
|
|
+tag-new(2)
|
|
|
|
p Add a string to the #[code StringStore].
|
|
|
|
+aside-code("Example").
|
|
stringstore = StringStore([u'apple', u'orange'])
|
|
banana_hash = stringstore.add(u'banana')
|
|
assert len(stringstore) == 3
|
|
assert banana_hash == 2525716904149915114
|
|
assert stringstore[banana_hash] == u'banana'
|
|
assert stringstore[u'banana'] == banana_hash
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code string]
|
|
+cell unicode
|
|
+cell The string to add.
|
|
|
|
+row("foot")
|
|
+cell returns
|
|
+cell uint64
|
|
+cell The string's hash value.
|
|
|
|
|
|
+h(2, "to_disk") StringStore.to_disk
|
|
+tag method
|
|
+tag-new(2)
|
|
|
|
p Save the current state to a directory.
|
|
|
|
+aside-code("Example").
|
|
stringstore.to_disk('/path/to/strings')
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code path]
|
|
+cell unicode or #[code Path]
|
|
+cell
|
|
| A path to a directory, which will be created if it doesn't exist.
|
|
| Paths may be either strings or #[code Path]-like objects.
|
|
|
|
+h(2, "from_disk") StringStore.from_disk
|
|
+tag method
|
|
+tag-new(2)
|
|
|
|
p Loads state from a directory. Modifies the object in place and returns it.
|
|
|
|
+aside-code("Example").
|
|
from spacy.strings import StringStore
|
|
stringstore = StringStore().from_disk('/path/to/strings')
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code path]
|
|
+cell unicode or #[code Path]
|
|
+cell
|
|
| A path to a directory. Paths may be either strings or
|
|
| #[code Path]-like objects.
|
|
|
|
+row("foot")
|
|
+cell returns
|
|
+cell #[code StringStore]
|
|
+cell The modified #[code StringStore] object.
|
|
|
|
+h(2, "to_bytes") StringStore.to_bytes
|
|
+tag method
|
|
|
|
p Serialize the current state to a binary string.
|
|
|
|
+aside-code("Example").
|
|
store_bytes = stringstore.to_bytes()
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code **exclude]
|
|
+cell -
|
|
+cell Named attributes to prevent from being serialized.
|
|
|
|
+row("foot")
|
|
+cell returns
|
|
+cell bytes
|
|
+cell The serialized form of the #[code StringStore] object.
|
|
|
|
+h(2, "from_bytes") StringStore.from_bytes
|
|
+tag method
|
|
|
|
p Load state from a binary string.
|
|
|
|
+aside-code("Example").
|
|
fron spacy.strings import StringStore
|
|
store_bytes = stringstore.to_bytes()
|
|
new_store = StringStore().from_bytes(store_bytes)
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code bytes_data]
|
|
+cell bytes
|
|
+cell The data to load from.
|
|
|
|
+row
|
|
+cell #[code **exclude]
|
|
+cell -
|
|
+cell Named attributes to prevent from being loaded.
|
|
|
|
+row("foot")
|
|
+cell returns
|
|
+cell #[code StringStore]
|
|
+cell The #[code StringStore] object.
|
|
|
|
+h(2, "util") Utilities
|
|
|
|
+h(3, "hash_string") strings.hash_string
|
|
+tag function
|
|
|
|
p Get a 64-bit hash for a given string.
|
|
|
|
+aside-code("Example").
|
|
from spacy.strings import hash_string
|
|
assert hash_string(u'apple') == 8566208034543834098
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code string]
|
|
+cell unicode
|
|
+cell The string to hash.
|
|
|
|
+row("foot")
|
|
+cell returns
|
|
+cell uint64
|
|
+cell The hash.
|