mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			187 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			187 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
//- 💫 DOCS > API > STRINGSTORE
 | 
						|
 | 
						|
include ../../_includes/_mixins
 | 
						|
 | 
						|
p Map strings to and from integer IDs.
 | 
						|
 | 
						|
+h(2, "init") StringStore.__init__
 | 
						|
    +tag method
 | 
						|
 | 
						|
p
 | 
						|
    |  Create the #[code StringStore]. Note that a newly initialised store will
 | 
						|
    |  always include an empty string #[code ''] at position #[code 0].
 | 
						|
 | 
						|
+aside-code("Example").
 | 
						|
    from spacy.strings import StringStore
 | 
						|
    stringstore = StringStore([u'apple', u'orange'])
 | 
						|
 | 
						|
+table(["Name", "Type", "Description"])
 | 
						|
    +row
 | 
						|
        +cell #[code strings]
 | 
						|
        +cell iterable
 | 
						|
        +cell A sequence of unicode strings to add to the store.
 | 
						|
 | 
						|
    +footrow
 | 
						|
        +cell returns
 | 
						|
        +cell #[code StringStore]
 | 
						|
        +cell The newly constructed object.
 | 
						|
 | 
						|
+h(2, "len") StringStore.__len__
 | 
						|
    +tag method
 | 
						|
 | 
						|
p Get the number of strings in the store.
 | 
						|
 | 
						|
+aside-code("Example").
 | 
						|
    stringstore = StringStore([u'apple', u'orange'])
 | 
						|
    assert len(stringstore) == 2
 | 
						|
 | 
						|
+table(["Name", "Type", "Description"])
 | 
						|
    +footrow
 | 
						|
        +cell returns
 | 
						|
        +cell int
 | 
						|
        +cell The number of strings in the store.
 | 
						|
 | 
						|
+h(2, "getitem") StringStore.__getitem__
 | 
						|
    +tag method
 | 
						|
 | 
						|
p Retrieve a string from a given integer ID, or vice versa.
 | 
						|
 | 
						|
+aside-code("Example").
 | 
						|
    stringstore = StringStore([u'apple', u'orange'])
 | 
						|
    int_id = stringstore[u'apple'] # 1
 | 
						|
    assert stringstore[int_id] == u'apple'
 | 
						|
 | 
						|
+table(["Name", "Type", "Description"])
 | 
						|
    +row
 | 
						|
        +cell #[code string_or_id]
 | 
						|
        +cell bytes, unicode or int
 | 
						|
        +cell The value to encode.
 | 
						|
 | 
						|
    +footrow
 | 
						|
        +cell returns
 | 
						|
        +cell unicode or int
 | 
						|
        +cell The value to be retrieved.
 | 
						|
 | 
						|
+h(2, "contains") StringStore.__contains__
 | 
						|
    +tag method
 | 
						|
 | 
						|
p Check whether a string is in the store.
 | 
						|
 | 
						|
+aside-code("Example").
 | 
						|
    stringstore = StringStore([u'apple', u'orange'])
 | 
						|
    assert u'apple' in stringstore == True
 | 
						|
    assert u'cherry' in stringstore == False
 | 
						|
 | 
						|
+table(["Name", "Type", "Description"])
 | 
						|
    +row
 | 
						|
        +cell #[code string]
 | 
						|
        +cell unicode
 | 
						|
        +cell The string to check.
 | 
						|
 | 
						|
    +footrow
 | 
						|
        +cell returns
 | 
						|
        +cell bool
 | 
						|
        +cell Whether the store contains the string.
 | 
						|
 | 
						|
+h(2, "iter") StringStore.__iter__
 | 
						|
    +tag method
 | 
						|
 | 
						|
p
 | 
						|
    |  Iterate over the strings in the store, in order. Note that a newly
 | 
						|
    |  initialised store will always include an empty string #[code ''] at
 | 
						|
    |  position #[code 0].
 | 
						|
 | 
						|
+aside-code("Example").
 | 
						|
    stringstore = StringStore([u'apple', u'orange'])
 | 
						|
    all_strings = [s for s in stringstore]
 | 
						|
    assert all_strings == [u'', u'apple', u'orange']
 | 
						|
 | 
						|
+table(["Name", "Type", "Description"])
 | 
						|
    +footrow
 | 
						|
        +cell yields
 | 
						|
        +cell unicode
 | 
						|
        +cell A string in the store.
 | 
						|
 | 
						|
+h(2, "to_disk") StringStore.to_disk
 | 
						|
    +tag method
 | 
						|
 | 
						|
p Save the current state to a directory.
 | 
						|
 | 
						|
+aside-code("Example").
 | 
						|
    stringstore.to_disk('/path/to/strings')
 | 
						|
 | 
						|
+table(["Name", "Type", "Description"])
 | 
						|
    +row
 | 
						|
        +cell #[code path]
 | 
						|
        +cell unicode or #[code Path]
 | 
						|
        +cell
 | 
						|
            |  A path to a directory, which will be created if it doesn't exist.
 | 
						|
            |  Paths may be either strings or #[code Path]-like objects.
 | 
						|
 | 
						|
+h(2, "from_disk") Tokenizer.from_disk
 | 
						|
    +tag method
 | 
						|
 | 
						|
p Loads state from a directory. Modifies the object in place and returns it.
 | 
						|
 | 
						|
+aside-code("Example").
 | 
						|
    from spacy.strings import StringStore
 | 
						|
    stringstore = StringStore().from_disk('/path/to/strings')
 | 
						|
 | 
						|
+table(["Name", "Type", "Description"])
 | 
						|
    +row
 | 
						|
        +cell #[code path]
 | 
						|
        +cell unicode or #[code Path]
 | 
						|
        +cell
 | 
						|
            |  A path to a directory. Paths may be either strings or
 | 
						|
            |  #[code Path]-like objects.
 | 
						|
 | 
						|
    +footrow
 | 
						|
        +cell returns
 | 
						|
        +cell #[code Tokenizer]
 | 
						|
        +cell The modified #[code Tokenizer] object.
 | 
						|
 | 
						|
+h(2, "to_bytes") Tokenizer.to_bytes
 | 
						|
    +tag method
 | 
						|
 | 
						|
p Serialize the current state to a binary string.
 | 
						|
 | 
						|
+aside-code("Example").
 | 
						|
    store_bytes = stringstore.to_bytes()
 | 
						|
 | 
						|
+table(["Name", "Type", "Description"])
 | 
						|
    +row
 | 
						|
        +cell #[code **exclude]
 | 
						|
        +cell -
 | 
						|
        +cell Named attributes to prevent from being serialized.
 | 
						|
 | 
						|
    +footrow
 | 
						|
        +cell returns
 | 
						|
        +cell bytes
 | 
						|
        +cell The serialized form of the #[code Tokenizer] object.
 | 
						|
 | 
						|
+h(2, "from_bytes") Tokenizer.from_bytes
 | 
						|
    +tag method
 | 
						|
 | 
						|
p Load state from a binary string.
 | 
						|
 | 
						|
+aside-code("Example").
 | 
						|
    fron spacy.strings import StringStore
 | 
						|
    store_bytes = stringstore.to_bytes()
 | 
						|
    new_store = StringStore().from_bytes(store_bytes)
 | 
						|
 | 
						|
+table(["Name", "Type", "Description"])
 | 
						|
    +row
 | 
						|
        +cell #[code bytes_data]
 | 
						|
        +cell bytes
 | 
						|
        +cell The data to load from.
 | 
						|
 | 
						|
    +row
 | 
						|
        +cell #[code **exclude]
 | 
						|
        +cell -
 | 
						|
        +cell Named attributes to prevent from being loaded.
 | 
						|
 | 
						|
    +footrow
 | 
						|
        +cell returns
 | 
						|
        +cell #[code StringStore]
 | 
						|
        +cell The #[code StringStore] object.
 |