mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-24 20:51:30 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			240 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			240 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| //- 💫 DOCS > API > STRINGSTORE
 | |
| 
 | |
| include ../_includes/_mixins
 | |
| 
 | |
| p
 | |
|     |  Look up strings by 64-bit hashes. As of v2.0, spaCy uses hash values
 | |
|     |  instead of integer IDs. This ensures that strings always map to the
 | |
|     |  same ID, even from different #[code StringStores].
 | |
| 
 | |
| +h(2, "init") StringStore.__init__
 | |
|     +tag method
 | |
| 
 | |
| p
 | |
|     |  Create the #[code StringStore].
 | |
| 
 | |
| +aside-code("Example").
 | |
|     from spacy.strings import StringStore
 | |
|     stringstore = StringStore([u'apple', u'orange'])
 | |
| 
 | |
| +table(["Name", "Type", "Description"])
 | |
|     +row
 | |
|         +cell #[code strings]
 | |
|         +cell iterable
 | |
|         +cell A sequence of unicode strings to add to the store.
 | |
| 
 | |
|     +row("foot")
 | |
|         +cell returns
 | |
|         +cell #[code StringStore]
 | |
|         +cell The newly constructed object.
 | |
| 
 | |
| +h(2, "len") StringStore.__len__
 | |
|     +tag method
 | |
| 
 | |
| p Get the number of strings in the store.
 | |
| 
 | |
| +aside-code("Example").
 | |
|     stringstore = StringStore([u'apple', u'orange'])
 | |
|     assert len(stringstore) == 2
 | |
| 
 | |
| +table(["Name", "Type", "Description"])
 | |
|     +row("foot")
 | |
|         +cell returns
 | |
|         +cell int
 | |
|         +cell The number of strings in the store.
 | |
| 
 | |
| +h(2, "getitem") StringStore.__getitem__
 | |
|     +tag method
 | |
| 
 | |
| p Retrieve a string from a given hash, or vice versa.
 | |
| 
 | |
| +aside-code("Example").
 | |
|     stringstore = StringStore([u'apple', u'orange'])
 | |
|     apple_hash = stringstore[u'apple']
 | |
|     assert apple_hash == 8566208034543834098
 | |
|     assert stringstore[apple_hash] == u'apple'
 | |
| 
 | |
| +table(["Name", "Type", "Description"])
 | |
|     +row
 | |
|         +cell #[code string_or_id]
 | |
|         +cell bytes, unicode or uint64
 | |
|         +cell The value to encode.
 | |
| 
 | |
|     +row("foot")
 | |
|         +cell returns
 | |
|         +cell unicode or int
 | |
|         +cell The value to be retrieved.
 | |
| 
 | |
| +h(2, "contains") StringStore.__contains__
 | |
|     +tag method
 | |
| 
 | |
| p Check whether a string is in the store.
 | |
| 
 | |
| +aside-code("Example").
 | |
|     stringstore = StringStore([u'apple', u'orange'])
 | |
|     assert u'apple' in stringstore
 | |
|     assert not u'cherry' in stringstore
 | |
| 
 | |
| +table(["Name", "Type", "Description"])
 | |
|     +row
 | |
|         +cell #[code string]
 | |
|         +cell unicode
 | |
|         +cell The string to check.
 | |
| 
 | |
|     +row("foot")
 | |
|         +cell returns
 | |
|         +cell bool
 | |
|         +cell Whether the store contains the string.
 | |
| 
 | |
| +h(2, "iter") StringStore.__iter__
 | |
|     +tag method
 | |
| 
 | |
| p
 | |
|     |  Iterate over the strings in the store, in order. Note that a newly
 | |
|     |  initialised store will always include an empty string #[code ''] at
 | |
|     |  position #[code 0].
 | |
| 
 | |
| +aside-code("Example").
 | |
|     stringstore = StringStore([u'apple', u'orange'])
 | |
|     all_strings = [s for s in stringstore]
 | |
|     assert all_strings == [u'apple', u'orange']
 | |
| 
 | |
| +table(["Name", "Type", "Description"])
 | |
|     +row("foot")
 | |
|         +cell yields
 | |
|         +cell unicode
 | |
|         +cell A string in the store.
 | |
| 
 | |
| +h(2, "add") StringStore.add
 | |
|     +tag method
 | |
|     +tag-new(2)
 | |
| 
 | |
| p Add a string to the #[code StringStore].
 | |
| 
 | |
| +aside-code("Example").
 | |
|     stringstore = StringStore([u'apple', u'orange'])
 | |
|     banana_hash = stringstore.add(u'banana')
 | |
|     assert len(stringstore) == 3
 | |
|     assert banana_hash == 2525716904149915114
 | |
|     assert stringstore[banana_hash] == u'banana'
 | |
|     assert stringstore[u'banana'] == banana_hash
 | |
| 
 | |
| +table(["Name", "Type", "Description"])
 | |
|     +row
 | |
|         +cell #[code string]
 | |
|         +cell unicode
 | |
|         +cell The string to add.
 | |
| 
 | |
|     +row("foot")
 | |
|         +cell returns
 | |
|         +cell uint64
 | |
|         +cell The string's hash value.
 | |
| 
 | |
| 
 | |
| +h(2, "to_disk") StringStore.to_disk
 | |
|     +tag method
 | |
|     +tag-new(2)
 | |
| 
 | |
| p Save the current state to a directory.
 | |
| 
 | |
| +aside-code("Example").
 | |
|     stringstore.to_disk('/path/to/strings')
 | |
| 
 | |
| +table(["Name", "Type", "Description"])
 | |
|     +row
 | |
|         +cell #[code path]
 | |
|         +cell unicode or #[code Path]
 | |
|         +cell
 | |
|             |  A path to a directory, which will be created if it doesn't exist.
 | |
|             |  Paths may be either strings or #[code Path]-like objects.
 | |
| 
 | |
| +h(2, "from_disk") StringStore.from_disk
 | |
|     +tag method
 | |
|     +tag-new(2)
 | |
| 
 | |
| p Loads state from a directory. Modifies the object in place and returns it.
 | |
| 
 | |
| +aside-code("Example").
 | |
|     from spacy.strings import StringStore
 | |
|     stringstore = StringStore().from_disk('/path/to/strings')
 | |
| 
 | |
| +table(["Name", "Type", "Description"])
 | |
|     +row
 | |
|         +cell #[code path]
 | |
|         +cell unicode or #[code Path]
 | |
|         +cell
 | |
|             |  A path to a directory. Paths may be either strings or
 | |
|             |  #[code Path]-like objects.
 | |
| 
 | |
|     +row("foot")
 | |
|         +cell returns
 | |
|         +cell #[code StringStore]
 | |
|         +cell The modified #[code StringStore] object.
 | |
| 
 | |
| +h(2, "to_bytes") StringStore.to_bytes
 | |
|     +tag method
 | |
| 
 | |
| p Serialize the current state to a binary string.
 | |
| 
 | |
| +aside-code("Example").
 | |
|     store_bytes = stringstore.to_bytes()
 | |
| 
 | |
| +table(["Name", "Type", "Description"])
 | |
|     +row
 | |
|         +cell #[code **exclude]
 | |
|         +cell -
 | |
|         +cell Named attributes to prevent from being serialized.
 | |
| 
 | |
|     +row("foot")
 | |
|         +cell returns
 | |
|         +cell bytes
 | |
|         +cell The serialized form of the #[code StringStore] object.
 | |
| 
 | |
| +h(2, "from_bytes") StringStore.from_bytes
 | |
|     +tag method
 | |
| 
 | |
| p Load state from a binary string.
 | |
| 
 | |
| +aside-code("Example").
 | |
|     fron spacy.strings import StringStore
 | |
|     store_bytes = stringstore.to_bytes()
 | |
|     new_store = StringStore().from_bytes(store_bytes)
 | |
| 
 | |
| +table(["Name", "Type", "Description"])
 | |
|     +row
 | |
|         +cell #[code bytes_data]
 | |
|         +cell bytes
 | |
|         +cell The data to load from.
 | |
| 
 | |
|     +row
 | |
|         +cell #[code **exclude]
 | |
|         +cell -
 | |
|         +cell Named attributes to prevent from being loaded.
 | |
| 
 | |
|     +row("foot")
 | |
|         +cell returns
 | |
|         +cell #[code StringStore]
 | |
|         +cell The #[code StringStore] object.
 | |
| 
 | |
| +h(2, "util") Utilities
 | |
| 
 | |
| +h(3, "hash_string") strings.hash_string
 | |
|     +tag function
 | |
| 
 | |
| p Get a 64-bit hash for a given string.
 | |
| 
 | |
| +aside-code("Example").
 | |
|     from spacy.strings import hash_string
 | |
|     assert hash_string(u'apple') == 8566208034543834098
 | |
| 
 | |
| +table(["Name", "Type", "Description"])
 | |
|     +row
 | |
|         +cell #[code string]
 | |
|         +cell unicode
 | |
|         +cell The string to hash.
 | |
| 
 | |
|     +row("foot")
 | |
|         +cell returns
 | |
|         +cell uint64
 | |
|         +cell The hash.
 |