mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 21:21:10 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			239 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			239 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| ---
 | |
| title: StringStore
 | |
| tag: class
 | |
| source: spacy/strings.pyx
 | |
| ---
 | |
| 
 | |
| Look up strings by 64-bit hashes. As of v2.0, spaCy uses hash values instead of
 | |
| integer IDs. This ensures that strings always map to the same ID, even from
 | |
| different `StringStores`.
 | |
| 
 | |
| ## StringStore.\_\_init\_\_ {id="init",tag="method"}
 | |
| 
 | |
| Create the `StringStore`.
 | |
| 
 | |
| > #### Example
 | |
| >
 | |
| > ```python
 | |
| > from spacy.strings import StringStore
 | |
| > stringstore = StringStore(["apple", "orange"])
 | |
| > ```
 | |
| 
 | |
| | Name      | Description                                                            |
 | |
| | --------- | ---------------------------------------------------------------------- |
 | |
| | `strings` | A sequence of strings to add to the store. ~~Optional[Iterable[str]]~~ |
 | |
| 
 | |
| ## StringStore.\_\_len\_\_ {id="len",tag="method"}
 | |
| 
 | |
| Get the number of strings in the store.
 | |
| 
 | |
| > #### Example
 | |
| >
 | |
| > ```python
 | |
| > stringstore = StringStore(["apple", "orange"])
 | |
| > assert len(stringstore) == 2
 | |
| > ```
 | |
| 
 | |
| | Name        | Description                                 |
 | |
| | ----------- | ------------------------------------------- |
 | |
| | **RETURNS** | The number of strings in the store. ~~int~~ |
 | |
| 
 | |
| ## StringStore.\_\_getitem\_\_ {id="getitem",tag="method"}
 | |
| 
 | |
| Retrieve a string from a given hash. If a string is passed as the input, add it
 | |
| to the store and return its hash.
 | |
| 
 | |
| > #### Example
 | |
| >
 | |
| > ```python
 | |
| > stringstore = StringStore(["apple", "orange"])
 | |
| > apple_hash = stringstore["apple"]
 | |
| > assert apple_hash == 8566208034543834098
 | |
| > assert stringstore[apple_hash] == "apple"
 | |
| > ```
 | |
| 
 | |
| | Name             | Description                                                                  |
 | |
| | ---------------- | ---------------------------------------------------------------------------- |
 | |
| | `string_or_hash` | The hash value to lookup or the string to store. ~~Union[str, int]~~         |
 | |
| | **RETURNS**      | The stored string or the hash of the newly added string. ~~Union[str, int]~~ |
 | |
| 
 | |
| ## StringStore.\_\_contains\_\_ {id="contains",tag="method"}
 | |
| 
 | |
| Check whether a string or a hash is in the store.
 | |
| 
 | |
| > #### Example
 | |
| >
 | |
| > ```python
 | |
| > stringstore = StringStore(["apple", "orange"])
 | |
| > assert "apple" in stringstore
 | |
| > assert not "cherry" in stringstore
 | |
| > ```
 | |
| 
 | |
| | Name             | Description                                             |
 | |
| | ---------------- | ------------------------------------------------------- |
 | |
| | `string_or_hash` | The string or hash to check. ~~Union[str, int]~~        |
 | |
| | **RETURNS**      | Whether the store contains the string or hash. ~~bool~~ |
 | |
| 
 | |
| ## StringStore.\_\_iter\_\_ {id="iter",tag="method"}
 | |
| 
 | |
| Iterate over the stored strings in insertion order.
 | |
| 
 | |
| > #### Example
 | |
| >
 | |
| > ```python
 | |
| > stringstore = StringStore(["apple", "orange"])
 | |
| > all_strings = [s for s in stringstore]
 | |
| > assert all_strings == ["apple", "orange"]
 | |
| > ```
 | |
| 
 | |
| | Name        | Description                    |
 | |
| | ----------- | ------------------------------ |
 | |
| | **RETURNS** | A string in the store. ~~str~~ |
 | |
| 
 | |
| ## StringStore.items {id="iter", tag="method", version="4"}
 | |
| 
 | |
| Iterate over the stored string-hash pairs in insertion order.
 | |
| 
 | |
| > #### Example
 | |
| >
 | |
| > ```python
 | |
| > stringstore = StringStore(["apple", "orange"])
 | |
| > all_strings_and_hashes = stringstore.items()
 | |
| > assert all_strings_and_hashes == [("apple", 8566208034543834098), ("orange", 2208928596161743350)]
 | |
| > ```
 | |
| 
 | |
| | Name        | Description                                            |
 | |
| | ----------- | ------------------------------------------------------ |
 | |
| | **RETURNS** | A list of string-hash pairs. ~~List[Tuple[str, int]]~~ |
 | |
| 
 | |
| ## StringStore.keys {id="iter", tag="method", version="4"}
 | |
| 
 | |
| Iterate over the stored strings in insertion order.
 | |
| 
 | |
| > #### Example
 | |
| >
 | |
| > ```python
 | |
| > stringstore = StringStore(["apple", "orange"])
 | |
| > all_strings = stringstore.keys()
 | |
| > assert all_strings == ["apple", "orange"]
 | |
| > ```
 | |
| 
 | |
| | Name        | Description                      |
 | |
| | ----------- | -------------------------------- |
 | |
| | **RETURNS** | A list of strings. ~~List[str]~~ |
 | |
| 
 | |
| ## StringStore.values {id="iter", tag="method", version="4"}
 | |
| 
 | |
| Iterate over the stored string hashes in insertion order.
 | |
| 
 | |
| > #### Example
 | |
| >
 | |
| > ```python
 | |
| > stringstore = StringStore(["apple", "orange"])
 | |
| > all_hashes = stringstore.values()
 | |
| > assert all_hashes == [8566208034543834098, 2208928596161743350]
 | |
| > ```
 | |
| 
 | |
| | Name        | Description                            |
 | |
| | ----------- | -------------------------------------- |
 | |
| | **RETURNS** | A list of string hashes. ~~List[int]~~ |
 | |
| 
 | |
| ## StringStore.add {id="add", tag="method"}
 | |
| 
 | |
| Add a string to the `StringStore`.
 | |
| 
 | |
| > #### Example
 | |
| >
 | |
| > ```python
 | |
| > stringstore = StringStore(["apple", "orange"])
 | |
| > banana_hash = stringstore.add("banana")
 | |
| > assert len(stringstore) == 3
 | |
| > assert banana_hash == 2525716904149915114
 | |
| > assert stringstore[banana_hash] == "banana"
 | |
| > assert stringstore["banana"] == banana_hash
 | |
| > ```
 | |
| 
 | |
| | Name        | Description                      |
 | |
| | ----------- | -------------------------------- |
 | |
| | `string`    | The string to add. ~~str~~       |
 | |
| | **RETURNS** | The string's hash value. ~~int~~ |
 | |
| 
 | |
| ## StringStore.to_disk {id="to_disk",tag="method"}
 | |
| 
 | |
| Save the current state to a directory.
 | |
| 
 | |
| > #### Example
 | |
| >
 | |
| > ```python
 | |
| > stringstore.to_disk("/path/to/strings")
 | |
| > ```
 | |
| 
 | |
| | Name   | Description                                                                                                                                |
 | |
| | ------ | ------------------------------------------------------------------------------------------------------------------------------------------ |
 | |
| | `path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. ~~Union[str, Path]~~ |
 | |
| 
 | |
| ## StringStore.from_disk {id="from_disk",tag="method"}
 | |
| 
 | |
| Loads state from a directory. Modifies the object in place and returns it.
 | |
| 
 | |
| > #### Example
 | |
| >
 | |
| > ```python
 | |
| > from spacy.strings import StringStore
 | |
| > stringstore = StringStore().from_disk("/path/to/strings")
 | |
| > ```
 | |
| 
 | |
| | Name        | Description                                                                                     |
 | |
| | ----------- | ----------------------------------------------------------------------------------------------- |
 | |
| | `path`      | A path to a directory. Paths may be either strings or `Path`-like objects. ~~Union[str, Path]~~ |
 | |
| | **RETURNS** | The modified `StringStore` object. ~~StringStore~~                                              |
 | |
| 
 | |
| ## StringStore.to_bytes {id="to_bytes",tag="method"}
 | |
| 
 | |
| Serialize the current state to a binary string.
 | |
| 
 | |
| > #### Example
 | |
| >
 | |
| > ```python
 | |
| > store_bytes = stringstore.to_bytes()
 | |
| > ```
 | |
| 
 | |
| | Name        | Description                                                |
 | |
| | ----------- | ---------------------------------------------------------- |
 | |
| | **RETURNS** | The serialized form of the `StringStore` object. ~~bytes~~ |
 | |
| 
 | |
| ## StringStore.from_bytes {id="from_bytes",tag="method"}
 | |
| 
 | |
| Load state from a binary string.
 | |
| 
 | |
| > #### Example
 | |
| >
 | |
| > ```python
 | |
| > from spacy.strings import StringStore
 | |
| > store_bytes = stringstore.to_bytes()
 | |
| > new_store = StringStore().from_bytes(store_bytes)
 | |
| > ```
 | |
| 
 | |
| | Name         | Description                               |
 | |
| | ------------ | ----------------------------------------- |
 | |
| | `bytes_data` | The data to load from. ~~bytes~~          |
 | |
| | **RETURNS**  | The `StringStore` object. ~~StringStore~~ |
 | |
| 
 | |
| ## Utilities {id="util"}
 | |
| 
 | |
| ### strings.hash_string {id="hash_string",tag="function"}
 | |
| 
 | |
| Get a 64-bit hash for a given string.
 | |
| 
 | |
| > #### Example
 | |
| >
 | |
| > ```python
 | |
| > from spacy.strings import hash_string
 | |
| > assert hash_string("apple") == 8566208034543834098
 | |
| > ```
 | |
| 
 | |
| | Name        | Description                 |
 | |
| | ----------- | --------------------------- |
 | |
| | `string`    | The string to hash. ~~str~~ |
 | |
| | **RETURNS** | The hash. ~~int~~           |
 |