mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-11 16:52:21 +03:00
* Make .pxd file for huffman codec
This commit is contained in:
parent
0628e0e2a8
commit
77385d5580
23
spacy/serialize.pxd
Normal file
23
spacy/serialize.pxd
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
from libcpp.vector cimport vector
|
||||||
|
from libc.stdint cimport uint32_t
|
||||||
|
from libc.stdint cimport int64_t
|
||||||
|
from libc.stdint cimport int32_t
|
||||||
|
from libc.stdint cimport uint64_t
|
||||||
|
|
||||||
|
|
||||||
|
cdef struct Node:
|
||||||
|
float prob
|
||||||
|
int32_t left
|
||||||
|
int32_t right
|
||||||
|
|
||||||
|
|
||||||
|
cdef struct Code:
|
||||||
|
uint64_t bits
|
||||||
|
char length
|
||||||
|
|
||||||
|
|
||||||
|
cdef class HuffmanCodec:
|
||||||
|
cdef vector[Node] nodes
|
||||||
|
cdef vector[Code] codes
|
||||||
|
cdef uint32_t eol
|
||||||
|
|
|
@ -19,17 +19,6 @@ cimport cython
|
||||||
# combo. ? bits per word. 40 * 80 * 40 * 12 = 1.5m symbol vocab
|
# combo. ? bits per word. 40 * 80 * 40 * 12 = 1.5m symbol vocab
|
||||||
|
|
||||||
|
|
||||||
cdef struct Node:
|
|
||||||
float prob
|
|
||||||
int32_t left
|
|
||||||
int32_t right
|
|
||||||
|
|
||||||
|
|
||||||
cdef struct Code:
|
|
||||||
uint64_t bits
|
|
||||||
char length
|
|
||||||
|
|
||||||
|
|
||||||
# Note that we're setting the most significant bits here first, when in practice
|
# Note that we're setting the most significant bits here first, when in practice
|
||||||
# we're actually wanting the last bit to be most significant (for Huffman coding,
|
# we're actually wanting the last bit to be most significant (for Huffman coding,
|
||||||
# anyway).
|
# anyway).
|
||||||
|
@ -90,9 +79,6 @@ cdef class HuffmanCodec:
|
||||||
|
|
||||||
eol (uint32_t): The index of the weight of the EOL symbol.
|
eol (uint32_t): The index of the weight of the EOL symbol.
|
||||||
"""
|
"""
|
||||||
cdef vector[Node] nodes
|
|
||||||
cdef vector[Code] codes
|
|
||||||
cdef uint32_t eol
|
|
||||||
def __init__(self, float[:] probs, uint32_t eol):
|
def __init__(self, float[:] probs, uint32_t eol):
|
||||||
self.eol = eol
|
self.eol = eol
|
||||||
self.codes.resize(len(probs))
|
self.codes.resize(len(probs))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user