spaCy/website/docs/api/stringstore.jade

187 lines
4.7 KiB
Plaintext

//- 💫 DOCS > API > STRINGSTORE
include ../../_includes/_mixins
p Map strings to and from integer IDs.
+h(2, "init") StringStore.__init__
+tag method
p
| Create the #[code StringStore]. Note that a newly initialised store will
| always include an empty string #[code ''] at position #[code 0].
+aside-code("Example").
from spacy.strings import StringStore
stringstore = StringStore([u'apple', u'orange'])
+table(["Name", "Type", "Description"])
+row
+cell #[code strings]
+cell iterable
+cell A sequence of unicode strings to add to the store.
+footrow
+cell returns
+cell #[code StringStore]
+cell The newly constructed object.
+h(2, "len") StringStore.__len__
+tag method
p Get the number of strings in the store.
+aside-code("Example").
stringstore = StringStore([u'apple', u'orange'])
assert len(stringstore) == 2
+table(["Name", "Type", "Description"])
+footrow
+cell returns
+cell int
+cell The number of strings in the store.
+h(2, "getitem") StringStore.__getitem__
+tag method
p Retrieve a string from a given integer ID, or vice versa.
+aside-code("Example").
stringstore = StringStore([u'apple', u'orange'])
int_id = stringstore[u'apple'] # 1
assert stringstore[int_id] == u'apple'
+table(["Name", "Type", "Description"])
+row
+cell #[code string_or_id]
+cell bytes, unicode or int
+cell The value to encode.
+footrow
+cell returns
+cell unicode or int
+cell The value to be retrieved.
+h(2, "contains") StringStore.__contains__
+tag method
p Check whether a string is in the store.
+aside-code("Example").
stringstore = StringStore([u'apple', u'orange'])
assert u'apple' in stringstore == True
assert u'cherry' in stringstore == False
+table(["Name", "Type", "Description"])
+row
+cell #[code string]
+cell unicode
+cell The string to check.
+footrow
+cell returns
+cell bool
+cell Whether the store contains the string.
+h(2, "iter") StringStore.__iter__
+tag method
p
| Iterate over the strings in the store, in order. Note that a newly
| initialised store will always include an empty string #[code ''] at
| position #[code 0].
+aside-code("Example").
stringstore = StringStore([u'apple', u'orange'])
all_strings = [s for s in stringstore]
assert all_strings == [u'', u'apple', u'orange']
+table(["Name", "Type", "Description"])
+footrow
+cell yields
+cell unicode
+cell A string in the store.
+h(2, "to_disk") StringStore.to_disk
+tag method
p Save the current state to a directory.
+aside-code("Example").
stringstore.to_disk('/path/to/strings')
+table(["Name", "Type", "Description"])
+row
+cell #[code path]
+cell unicode or #[code Path]
+cell
| A path to a directory, which will be created if it doesn't exist.
| Paths may be either strings or #[code Path]-like objects.
+h(2, "from_disk") Tokenizer.from_disk
+tag method
p Loads state from a directory. Modifies the object in place and returns it.
+aside-code("Example").
from spacy.strings import StringStore
stringstore = StringStore().from_disk('/path/to/strings')
+table(["Name", "Type", "Description"])
+row
+cell #[code path]
+cell unicode or #[code Path]
+cell
| A path to a directory. Paths may be either strings or
| #[code Path]-like objects.
+footrow
+cell returns
+cell #[code Tokenizer]
+cell The modified #[code Tokenizer] object.
+h(2, "to_bytes") Tokenizer.to_bytes
+tag method
p Serialize the current state to a binary string.
+aside-code("Example").
store_bytes = stringstore.to_bytes()
+table(["Name", "Type", "Description"])
+row
+cell #[code **exclude]
+cell -
+cell Named attributes to prevent from being serialized.
+footrow
+cell returns
+cell bytes
+cell The serialized form of the #[code Tokenizer] object.
+h(2, "from_bytes") Tokenizer.from_bytes
+tag method
p Load state from a binary string.
+aside-code("Example").
fron spacy.strings import StringStore
store_bytes = stringstore.to_bytes()
new_store = StringStore().from_bytes(store_bytes)
+table(["Name", "Type", "Description"])
+row
+cell #[code bytes_data]
+cell bytes
+cell The data to load from.
+row
+cell #[code **exclude]
+cell -
+cell Named attributes to prevent from being loaded.
+footrow
+cell returns
+cell #[code StringStore]
+cell The #[code StringStore] object.