diff --git a/spacy/serialize.pxd b/spacy/serialize.pxd new file mode 100644 index 000000000..4c81ccccd --- /dev/null +++ b/spacy/serialize.pxd @@ -0,0 +1,23 @@ +from libcpp.vector cimport vector +from libc.stdint cimport uint32_t +from libc.stdint cimport int64_t +from libc.stdint cimport int32_t +from libc.stdint cimport uint64_t + + +cdef struct Node: + float prob + int32_t left + int32_t right + + +cdef struct Code: + uint64_t bits + char length + + +cdef class HuffmanCodec: + cdef vector[Node] nodes + cdef vector[Code] codes + cdef uint32_t eol + diff --git a/spacy/serialize.pyx b/spacy/serialize.pyx index 303b073db..07f9a95f8 100644 --- a/spacy/serialize.pyx +++ b/spacy/serialize.pyx @@ -19,17 +19,6 @@ cimport cython # combo. ? bits per word. 40 * 80 * 40 * 12 = 1.5m symbol vocab -cdef struct Node: - float prob - int32_t left - int32_t right - - -cdef struct Code: - uint64_t bits - char length - - # Note that we're setting the most significant bits here first, when in practice # we're actually wanting the last bit to be most significant (for Huffman coding, # anyway). @@ -90,9 +79,6 @@ cdef class HuffmanCodec: eol (uint32_t): The index of the weight of the EOL symbol. """ - cdef vector[Node] nodes - cdef vector[Code] codes - cdef uint32_t eol def __init__(self, float[:] probs, uint32_t eol): self.eol = eol self.codes.resize(len(probs))