* Draft dockstring for HuffmanCache

This commit is contained in:
Matthew Honnibal 2015-07-13 12:01:18 +02:00
parent 281f1faefb
commit 8d0f1d98da

View File

@ -44,14 +44,26 @@ cdef Code bit_append(Code code, bint bit) nogil:
cdef class HuffmanCodec: cdef class HuffmanCodec:
"""Create a Huffman code table, and use it to pack and unpack sequences into
byte strings. Emphasis is on efficiency, so API is quite strict:
Messages will be encoded/decoded as indices that refer to the probability sequence.
For instance, the sequence [5, 10, 8] indicates the 5th most frequent item,
the 10th most frequent item, the 8th most frequent item. The codec will add
the EOL symbol to your message. An exception will be raised if you include
the EOL symbol in your message.
Arguments:
probs (float[:]): A descending-sorted sequence of probabilities/weights.
Must include a weight for an EOL symbol.
eol (uint32_t): The index of the weight of the EOL symbol.
"""
cdef vector[Node] nodes cdef vector[Node] nodes
cdef vector[Code] codes cdef vector[Code] codes
cdef readonly float[:] probs
cdef PreshMap table
cdef uint32_t eol cdef uint32_t eol
def __init__(self, probs, eol): def __init__(self, float[:] probs, uint32_t eol):
self.eol = eol self.eol = eol
self.probs = probs
self.codes.resize(len(probs)) self.codes.resize(len(probs))
for i in range(len(self.codes)): for i in range(len(self.codes)):
self.codes[i].bits = 0 self.codes[i].bits = 0
@ -69,7 +81,7 @@ cdef class HuffmanCodec:
cdef uint64_t one = 1 cdef uint64_t one = 1
cdef unsigned char i_of_byte = 0 cdef unsigned char i_of_byte = 0
cdef unsigned char i_of_code = 0 cdef unsigned char i_of_code = 0
for index in sequence: for index in list(sequence) + [self.eol]:
code = self.codes[index] code = self.codes[index]
for i_of_code in range(code.length): for i_of_code in range(code.length):
if code.bits & (one << i_of_code): if code.bits & (one << i_of_code):