mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 21:21:10 +03:00 
			
		
		
		
	* Draft dockstring for HuffmanCache
This commit is contained in:
		
							parent
							
								
									281f1faefb
								
							
						
					
					
						commit
						8d0f1d98da
					
				|  | @ -44,14 +44,26 @@ cdef Code bit_append(Code code, bint bit) nogil: | ||||||
|      |      | ||||||
| 
 | 
 | ||||||
| cdef class HuffmanCodec: | cdef class HuffmanCodec: | ||||||
|  |     """Create a Huffman code table, and use it to pack and unpack sequences into | ||||||
|  |     byte strings. Emphasis is on efficiency, so API is quite strict: | ||||||
|  | 
 | ||||||
|  |     Messages will be encoded/decoded as indices that refer to the probability sequence. | ||||||
|  |     For instance, the sequence [5, 10, 8] indicates the 5th most frequent item, | ||||||
|  |     the 10th most frequent item, the 8th most frequent item.  The codec will add | ||||||
|  |     the EOL symbol to your message. An exception will be raised if you include | ||||||
|  |     the EOL symbol in your message. | ||||||
|  | 
 | ||||||
|  |     Arguments: | ||||||
|  |         probs (float[:]): A descending-sorted sequence of probabilities/weights. | ||||||
|  |           Must include a weight for an EOL symbol. | ||||||
|  | 
 | ||||||
|  |         eol (uint32_t): The index of the weight of the EOL symbol. | ||||||
|  |     """ | ||||||
|     cdef vector[Node] nodes |     cdef vector[Node] nodes | ||||||
|     cdef vector[Code] codes |     cdef vector[Code] codes | ||||||
|     cdef readonly float[:] probs |  | ||||||
|     cdef PreshMap table |  | ||||||
|     cdef uint32_t eol |     cdef uint32_t eol | ||||||
|     def __init__(self, probs, eol): |     def __init__(self, float[:] probs, uint32_t eol): | ||||||
|         self.eol = eol |         self.eol = eol | ||||||
|         self.probs = probs |  | ||||||
|         self.codes.resize(len(probs)) |         self.codes.resize(len(probs)) | ||||||
|         for i in range(len(self.codes)): |         for i in range(len(self.codes)): | ||||||
|             self.codes[i].bits = 0 |             self.codes[i].bits = 0 | ||||||
|  | @ -69,7 +81,7 @@ cdef class HuffmanCodec: | ||||||
|         cdef uint64_t one = 1 |         cdef uint64_t one = 1 | ||||||
|         cdef unsigned char i_of_byte = 0 |         cdef unsigned char i_of_byte = 0 | ||||||
|         cdef unsigned char i_of_code = 0 |         cdef unsigned char i_of_code = 0 | ||||||
|         for index in sequence: |         for index in list(sequence) + [self.eol]: | ||||||
|             code = self.codes[index] |             code = self.codes[index] | ||||||
|             for i_of_code in range(code.length): |             for i_of_code in range(code.length): | ||||||
|                 if code.bits & (one << i_of_code): |                 if code.bits & (one << i_of_code): | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user